Compare commits

..

24 Commits

Author SHA1 Message Date
ParthSareen
a4265c278a wip 2025-03-24 11:02:02 -07:00
ParthSareen
ffd6428758 err handling + fixing scope issue 2025-02-11 19:29:48 -08:00
ParthSareen
aa6d5151df wip with json stuff and cleanup 2025-02-11 16:40:40 -08:00
ParthSareen
25edfa6fdb first pass so working 2025-02-04 22:09:22 -08:00
ParthSareen
cc2e44b885 Improved sampler interface 2025-02-03 16:05:15 -08:00
ParthSareen
d5f8670f0a WIP simple SO working 2025-02-03 11:40:06 -08:00
ParthSareen
524029cd6d tested with so 2025-01-31 17:12:39 -08:00
ParthSareen
b973dedb4b saved state 2025-01-30 15:17:42 -08:00
ParthSareen
c56a8b7749 wip 2025-01-30 15:05:25 -08:00
ParthSareen
198fde82aa Enable array type json 2025-01-29 14:54:39 -08:00
ParthSareen
77f709ebd5 cleanup state machine 2025-01-29 14:28:02 -08:00
ParthSareen
73098a2973 WIP working with other stuff 2025-01-29 10:28:22 -08:00
ParthSareen
e93db4d20e WIP 2025-01-27 16:33:55 -08:00
ParthSareen
a2a73ce5e0 wip! 2025-01-23 20:21:50 -08:00
ParthSareen
6ba557f25b checkpoint 2025-01-23 09:46:14 -08:00
ParthSareen
a7c8cc06da json checkpoint 2025-01-21 17:48:12 -08:00
Blake Mizerany
089bbb537d grammar: introduce new grammar package
This package provides a way to convert JSON schemas to equivalent EBNF.
It is intended to be a replacement to llama.cpp's schema_to_grammar.

This is still an early version and does not yet support all JSON schema
features. The to-do list includes:

- minumum/maximum constraints on integer types
- minLength/maxLength constraints on string types
- defs and refs
2025-01-20 14:40:50 -08:00
ParthSareen
7cd9fbbbb1 improve temperature sampler 2025-01-20 13:53:07 -08:00
ParthSareen
b91487f289 Working json sampler 2025-01-15 14:08:54 -08:00
ParthSareen
5b19d4941a addressing comments + cleanup 2025-01-14 16:23:48 -08:00
ParthSareen
5e73f24e16 sampling package 2025-01-14 16:23:38 -08:00
Michael Yang
4aac178cac update ci 2025-01-14 14:25:56 -08:00
Michael Yang
800e21d060 sort devices by type 2025-01-14 14:25:56 -08:00
Michael Yang
5827999e9e next ollama runner
implement llama and mllama model architectures in go using ggml (through
cgo)
2025-01-14 14:25:56 -08:00
631 changed files with 12478 additions and 11978 deletions

9
.gitattributes vendored
View File

@@ -7,5 +7,14 @@ llama/**/*.cuh linguist-vendored
llama/**/*.m linguist-vendored
llama/**/*.metal linguist-vendored
ml/backend/**/*.c linguist-vendored
ml/backend/**/*.h linguist-vendored
ml/backend/**/*.cpp linguist-vendored
ml/backend/**/*.hpp linguist-vendored
ml/backend/**/*.cu linguist-vendored
ml/backend/**/*.cuh linguist-vendored
ml/backend/**/*.m linguist-vendored
ml/backend/**/*.metal linguist-vendored
* text=auto
*.go text eol=lf

View File

@@ -1,11 +1,5 @@
name: test
env:
ROCM_WINDOWS_URL: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe
CUDA_12_WINDOWS_URL: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
CUDA_12_WINDOWS_VER: 12.4
concurrency:
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
# cancels running CI jobs and starts all new ones.
@@ -27,7 +21,7 @@ jobs:
changes:
runs-on: ubuntu-latest
outputs:
RUNNERS: ${{ steps.changes.outputs.RUNNERS }}
changed: ${{ steps.changes.outputs.changed }}
steps:
- uses: actions/checkout@v4
with:
@@ -35,309 +29,61 @@ jobs:
- id: changes
run: |
changed() {
git diff-tree -r --no-commit-id --name-only \
$(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
${{ github.event.pull_request.head.sha }} \
local BASE=${{ github.event.pull_request.base.sha }}
local HEAD=${{ github.event.pull_request.head.sha }}
local MERGE_BASE=$(git merge-base $BASE $HEAD)
git diff-tree -r --no-commit-id --name-only "$MERGE_BASE" "$HEAD" \
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
}
{
echo RUNNERS=$(changed 'llama/**')
} >>$GITHUB_OUTPUT
echo changed=$(changed 'llama/llama.cpp/**' 'ml/backend/ggml/ggml/**') | tee -a $GITHUB_OUTPUT
runners-linux-cuda:
linux:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
if: ${{ needs.changes.outputs.changed == 'True' }}
strategy:
matrix:
cuda-version:
- '11.8.0'
runs-on: linux
container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04
include:
- container: nvidia/cuda:11.8.0-devel-ubuntu22.04
preset: CUDA
- container: rocm/dev-ubuntu-22.04:6.1.2
preset: ROCm
extra-packages: rocm-libs
runs-on: ubuntu-latest
container: ${{ matrix.container }}
steps:
- uses: actions/checkout@v4
- run: |
apt-get update && apt-get install -y git build-essential curl
apt-get update
apt-get install -y cmake pkg-config ${{ matrix.extra-packages }}
env:
DEBIAN_FRONTEND: noninteractive
- uses: actions/checkout@v4
- uses: actions/setup-go@v4
with:
go-version-file: go.mod
cache: true
- run: go get ./...
- run: |
git config --global --add safe.directory /__w/ollama/ollama
cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
make -j $cores cuda_v11
runners-linux-rocm:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
strategy:
matrix:
rocm-version:
- '6.1.2'
runs-on: linux
container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
steps:
- run: |
apt-get update && apt-get install -y git build-essential curl rocm-libs
env:
DEBIAN_FRONTEND: noninteractive
- uses: actions/checkout@v4
- uses: actions/setup-go@v4
with:
go-version-file: go.mod
cache: true
- run: go get ./...
- run: |
git config --global --add safe.directory /__w/ollama/ollama
cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
make -j $cores rocm
cmake --preset ${{ matrix.preset }}
cmake --build --preset ${{ matrix.preset }} --parallel
# ROCm generation step
runners-windows-rocm:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
runs-on: windows
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- name: Set make jobs default
run: |
echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
# ROCM installation steps
- name: 'Cache ROCm installer'
id: cache-rocm
uses: actions/cache@v4
with:
path: rocm-install.exe
key: ${{ env.ROCM_WINDOWS_URL }}
- name: 'Conditionally Download ROCm'
if: steps.cache-rocm.outputs.cache-hit != 'true'
run: |
$ErrorActionPreference = "Stop"
Invoke-WebRequest -Uri "${env:ROCM_WINDOWS_URL}" -OutFile "rocm-install.exe"
- name: 'Install ROCm'
run: |
Start-Process "rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
- name: 'Verify ROCm'
run: |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
echo "HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path | select -first 1)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
- name: Add msys paths
run: |
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
- name: make rocm runner
run: |
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
make -C llama print-HIP_PATH print-HIP_LIB_DIR
make rocm
# CUDA generation step
runners-windows-cuda:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
runs-on: windows
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- name: Set make jobs default
run: |
echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
# CUDA installation steps
- name: 'Cache CUDA installer'
id: cache-cuda
uses: actions/cache@v4
with:
path: cuda-install.exe
key: ${{ env.CUDA_12_WINDOWS_URL }}
- name: 'Conditionally Download CUDA'
if: steps.cache-cuda.outputs.cache-hit != 'true'
run: |
$ErrorActionPreference = "Stop"
Invoke-WebRequest -Uri "${env:CUDA_12_WINDOWS_URL}" -OutFile "cuda-install.exe"
- name: 'Install CUDA'
run: |
$subpackages = @("cudart", "nvcc", "cublas", "cublas_dev") | foreach-object {"${_}_${{ env.CUDA_12_WINDOWS_VER }}"}
Start-Process "cuda-install.exe" -ArgumentList (@("-s") + $subpackages) -NoNewWindow -Wait
- name: 'Verify CUDA'
run: |
& (resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0] --version
$cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
$cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CUDA_PATH=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
echo "CUDA_PATH_V${cudaVer}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
- name: Add msys paths
run: |
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
- name: make cuda runner
run: |
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
make cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
runners-cpu:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
arch: [amd64, arm64]
exclude:
- os: ubuntu-latest
arch: arm64
- os: windows-2019
arch: arm64
runs-on: ${{ matrix.os }}
env:
GOARCH: ${{ matrix.arch }}
ARCH: ${{ matrix.arch }}
CGO_ENABLED: '1'
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- name: Add msys paths
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
- name: 'Build Windows Go Runners'
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
$gopath=(get-command go).source | split-path -parent
$gccpath=(get-command gcc).source | split-path -parent
import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
$env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
$env:PATH="$gopath;$gccpath;$env:PATH"
echo $env:PATH
if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
make -j 4
- name: 'Build Unix Go Runners'
if: ${{ ! startsWith(matrix.os, 'windows-') }}
run: make -j 4
- run: go build .
lint:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
arch: [amd64, arm64]
exclude:
- os: ubuntu-latest
arch: arm64
- os: windows-2019
arch: arm64
- os: macos-latest
arch: amd64
runs-on: ${{ matrix.os }}
env:
GOARCH: ${{ matrix.arch }}
CGO_ENABLED: '1'
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Add msys paths
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: false
- run: |
case ${{ matrix.arch }} in
amd64) echo ARCH=x86_64 ;;
arm64) echo ARCH=arm64 ;;
esac >>$GITHUB_ENV
shell: bash
- uses: golangci/golangci-lint-action@v6
with:
args: --timeout 10m0s -v
test:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
arch: [amd64]
exclude:
- os: ubuntu-latest
arch: arm64
- os: windows-2019
arch: arm64
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
env:
GOARCH: ${{ matrix.arch }}
CGO_ENABLED: '1'
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Add msys paths
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install msys2 tools
if: ${{ startsWith(matrix.os, 'windows-') }}
run: |
Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true
- run: |
case ${{ matrix.arch }} in
amd64) echo ARCH=amd64 ;;
arm64) echo ARCH=arm64 ;;
esac >>$GITHUB_ENV
shell: bash
- uses: golangci/golangci-lint-action@v6
with:
args: --timeout 10m0s -v
- run: go test ./...
patches:
needs: [changes]
if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Verify patches carry all the changes
- name: Verify patches apply cleanly and do not change files
run: |
make apply-patches sync && git diff --compact-summary --exit-code llama
make -f Makefile2 clean checkout sync
git diff --compact-summary --exit-code

45
CMakeLists.txt Normal file
View File

@@ -0,0 +1,45 @@
cmake_minimum_required(VERSION 3.21)
project(Ollama C CXX)
include(CheckLanguage)
find_package(Threads REQUIRED)
set(CMAKE_BUILD_TYPE Release)
set(BUILD_SHARED_LIBS ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(GGML_BUILD ON)
set(GGML_SHARED ON)
set(GGML_CCACHE ON)
set(GGML_BACKEND_DL ON)
set(GGML_BACKEND_SHARED ON)
set(GGML_SCHED_MAX_COPIES 4)
set(GGML_CPU_ALL_VARIANTS ON)
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
set(GGML_LLAMAFILE ON)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx)
set(GGML_CPU ON)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
endif()
check_language(HIP)
if(CMAKE_HIP_COMPILER)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
endif()

96
CMakePresets.json Normal file
View File

@@ -0,0 +1,96 @@
{
"version": 3,
"configurePresets": [
{
"name": "Default",
"binaryDir": "${sourceDir}/build",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release"
}
},
{
"name": "CUDA",
"inherits": [ "Default" ]
},
{
"name": "CUDA 11",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;62;70;72;75;80;86"
}
},
{
"name": "CUDA 12",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "60;61;62;70;72;75;80;86;87;89;90;90a"
}
},
{
"name": "JetPack 5",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "72;87"
}
},
{
"name": "JetPack 6",
"inherits": [ "CUDA" ],
"cacheVariables": {
"CMAKE_CUDA_ARCHITECTURES": "87"
}
},
{
"name": "ROCm",
"inherits": [ "Default" ]
},
{
"name": "ROCm 6",
"inherits": [ "ROCm" ],
"cacheVariables": {
"CMAKE_HIP_ARCHITECTURES": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
}
}
],
"buildPresets": [
{
"name": "Default",
"configurePreset": "Default"
},
{
"name": "CUDA",
"configurePreset": "CUDA",
"targets": [ "ggml-cuda" ]
},
{
"name": "CUDA 11",
"inherits": [ "CUDA" ],
"configurePreset": "CUDA 11"
},
{
"name": "CUDA 12",
"inherits": [ "CUDA" ],
"configurePreset": "CUDA 12"
},
{
"name": "JetPack 5",
"inherits": [ "CUDA" ],
"configurePreset": "JetPack 5"
},
{
"name": "JetPack 6",
"inherits": [ "CUDA" ],
"configurePreset": "JetPack 6"
},
{
"name": "ROCm",
"configurePreset": "ROCm",
"targets": [ "ggml-hip" ]
},
{
"name": "ROCm 6",
"inherits": [ "ROCm" ],
"configurePreset": "ROCm 6"
}
]
}

66
Dockerfile2 Normal file
View File

@@ -0,0 +1,66 @@
ARG CUDA_11_VERSION=11.3
ARG CUDA_12_VERSION=12.4
ARG ROCM_VERSION=6.1.2
ARG JETPACK_5_VERSION=r35.4.1
ARG JETPACK_6_VERSION=r36.2.0
ARG CMAKE_VERSION=3.31.2
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS base
ARG CMAKE_VERSION
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz | tar xz -C /usr --strip-components 1
RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
# FROM --platform=linux/arm64 rockylinux:8 AS base
# ARG CMAKE_VERSION
# RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
# RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
FROM base AS amd64
ARG CUDA_11_VERSION
ARG CUDA_12_VERSION
RUN yum install -y cuda-toolkit-${CUDA_11_VERSION//./-} \
&& yum install -y cuda-toolkit-${CUDA_12_VERSION//./-}
COPY CMakeLists.txt CMakeLists.txt
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
FROM --platform=linux/amd64 amd64 AS cuda_11
ENV PATH=/usr/local/cuda-${CUDA_11_VERSION}/bin:$PATH
RUN cmake -S . -B build -DCMAKE_CUDA_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
RUN cmake --build build --target ggml-cuda -j
FROM --platform=linux/amd64 amd64 AS cuda_12
ENV PATH=/usr/local/cuda-${CUDA_12_VERSION}/bin:$PATH
RUN cmake -S . -B build -DCMAKE_CUDA_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
RUN cmake --build build --target ggml-cuda -j
FROM --platform=linux/amd64 amd64 AS rocm
RUN cmake -S . -B build -DCMAKE_HIP_ARCHITECTURES="gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
RUN cmake --build build --target ggml-hip -j
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5_VERSION} AS jetpack_5
ARG CMAKE_VERSION
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
COPY CMakeLists.txt .
COPY ml/backend/ggml/ggml .
RUN cmake -S . -B build \
-DCMAKE_CUDA_ARCHITECTURES="72;87"
RUN cmake --build build --target ggml-cuda
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6_VERSION} AS jetpack_6
ARG CMAKE_VERSION
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1
COPY CMakeLists.txt .
COPY ml/backend/ggml/ggml .
RUN cmake -S . -B build \
-DCMAKE_CUDA_ARCHITECTURES="87"
RUN cmake --build build --target ggml-cuda
FROM --platform=linux/amd64 golang:1.23
COPY --from=cuda_11 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-11.so
COPY --from=cuda_12 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-12.so
COPY --from=rocm build/ml/backend/ggml/ggml/src/ggml-hip/libggml-hip.so libggml-hip.so
# FROM --platform=linux/arm64 golang:1.23
# COPY --from=jetpack_5 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-jetpack-5.so
# COPY --from=jetpack_6 build/ml/backend/ggml/ggml/src/ggml-cuda/libggml-cuda.so libggml-cuda-jetpack-6.so

103
Makefile
View File

@@ -1,103 +0,0 @@
# top level makefile for Ollama
include make/common-defs.make
# Determine which if any GPU runners we should build
include make/cuda-v11-defs.make
include make/cuda-v12-defs.make
include make/rocm-defs.make
ifeq ($(CUSTOM_CPU_FLAGS),)
ifeq ($(ARCH),amd64)
RUNNER_TARGETS=cpu
endif
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
ifneq ($(CUDA_11_COMPILER),)
RUNNER_TARGETS += cuda_v11
endif
ifneq ($(CUDA_12_COMPILER),)
RUNNER_TARGETS += cuda_v12
endif
endif
else # CUSTOM_CPU_FLAGS is set, we'll build only the latest cuda version detected
ifneq ($(CUDA_12_COMPILER),)
RUNNER_TARGETS += cuda_v12
else ifneq ($(CUDA_11_COMPILER),)
RUNNER_TARGETS += cuda_v11
endif
endif
ifeq ($(OLLAMA_SKIP_ROCM_GENERATE),)
ifneq ($(HIP_COMPILER),)
RUNNER_TARGETS += rocm
endif
endif
all: runners exe
dist: $(addprefix dist_, $(RUNNER_TARGETS)) dist_exe
dist_%:
@$(MAKE) --no-print-directory -f make/Makefile.$* dist
runners: $(RUNNER_TARGETS)
$(RUNNER_TARGETS):
@$(MAKE) --no-print-directory -f make/Makefile.$@
exe dist_exe:
@$(MAKE) --no-print-directory -f make/Makefile.ollama $@
help-sync apply-patches create-patches sync sync-clean:
@$(MAKE) --no-print-directory -f make/Makefile.sync $@
test integration lint:
@$(MAKE) --no-print-directory -f make/Makefile.test $@
clean:
rm -rf $(BUILD_DIR) $(DIST_LIB_DIR) $(OLLAMA_EXE) $(DIST_OLLAMA_EXE)
go clean -cache
help:
@echo "The following make targets will help you build Ollama"
@echo ""
@echo " make all # (default target) Build Ollama llm subprocess runners, and the primary ollama executable"
@echo " make runners # Build Ollama llm subprocess runners; after you may use 'go build .' to build the primary ollama exectuable"
@echo " make <runner> # Build specific runners. Enabled: '$(RUNNER_TARGETS)'"
@echo " make dist # Build the runners and primary ollama executable for distribution"
@echo " make help-sync # Help information on vendor update targets"
@echo " make help-runners # Help information on runner targets"
@echo ""
@echo "The following make targets will help you test Ollama"
@echo ""
@echo " make test # Run unit tests"
@echo " make integration # Run integration tests. You must 'make all' first"
@echo " make lint # Run lint and style tests"
@echo ""
@echo "For more information see 'docs/development.md'"
@echo ""
help-runners:
@echo "The following runners will be built based on discovered GPU libraries: '$(RUNNER_TARGETS)'"
@echo ""
@echo "GPU Runner CPU Flags: '$(GPU_RUNNER_CPU_FLAGS)' (Override with CUSTOM_CPU_FLAGS)"
@echo ""
@echo "# CUDA_PATH sets the location where CUDA toolkits are present"
@echo "CUDA_PATH=$(CUDA_PATH)"
@echo " CUDA_11_PATH=$(CUDA_11_PATH)"
@echo " CUDA_11_COMPILER=$(CUDA_11_COMPILER)"
@echo " CUDA_12_PATH=$(CUDA_12_PATH)"
@echo " CUDA_12_COMPILER=$(CUDA_12_COMPILER)"
@echo ""
@echo "# HIP_PATH sets the location where the ROCm toolkit is present"
@echo "HIP_PATH=$(HIP_PATH)"
@echo " HIP_COMPILER=$(HIP_COMPILER)"
.PHONY: all exe dist help help-sync help-runners test integration lint runners clean $(RUNNER_TARGETS)
# Handy debugging for make variables
print-%:
@echo '$*=$($*)'

46
Makefile2 Normal file
View File

@@ -0,0 +1,46 @@
UPSTREAM=https://github.com/ggerganov/llama.cpp.git
WORKDIR=llama/vendor
FETCH_HEAD=46e3556e01b824e52395fb050b29804b6cff2a7c
all: sync
.PHONY: sync
sync: llama/llama.cpp ml/backend/ggml/ggml
.PHONY: llama/llama.cpp
llama/llama.cpp: llama/vendor/ apply_patches
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
.PHONY: ml/backend/ggml/ggml apply_patches
ml/backend/ggml/ggml: llama/vendor/ggml/ apply_patches
rsync -arvzc -f "merge $@/.rsync-filter" $< $@
PATCHES=$(wildcard llama/patches/*.patch)
.PHONY: apply_patches
.NOTPARALLEL:
apply_patches: $(addsuffix ed, $(PATCHES))
%.patched: %.patch
@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi
.PHONY: checkout
checkout: $(WORKDIR)
git -C $(WORKDIR) fetch
git -C $(WORKDIR) checkout -f $(FETCH_HEAD)
$(WORKDIR):
git clone $(UPSTREAM) $(WORKDIR)
.PHONE: format_patches
format_patches: llama/patches
git -C $(WORKDIR) format-patch \
--no-signature \
--no-numbered \
--zero-commit \
-o $(realpath $<) \
$(FETCH_HEAD)
.PHONE: clean
clean: checkout
$(RM) $(addsuffix ed, $(PATCHES))

View File

@@ -539,5 +539,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
### Observability
- [OpenLIT](https://github.com/openlit/openlit) is an OpenTelemetry-native tool for monitoring Ollama Applications & GPUs using traces and metrics.
- [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.
- [Langfuse](https://langfuse.com/docs/integrations/ollama) is an open source LLM observability platform that enables teams to collaboratively monitor, evaluate and debug AI applications.
- [HoneyHive](https://docs.honeyhive.ai/integrations/ollama) is an AI observability and evaluation platform for AI agents. Use HoneyHive to evaluate agent performance, interrogate failures, and monitor quality in production.

63
cache/cache.go vendored Normal file
View File

@@ -0,0 +1,63 @@
package cache
import (
"github.com/ollama/ollama/ml"
)
type Options struct {
Position int
}
type Cache interface {
Sub(i int) Cache
Put(ctx ml.Context, key, value ml.Tensor, opts Options) (ml.Tensor, ml.Tensor)
}
type Simple struct {
DType ml.DType
Capacity int
keys, values []ml.Tensor
}
func (c *Simple) Sub(i int) Cache {
if i >= len(c.keys) {
c.keys = append(c.keys, make([]ml.Tensor, i-len(c.keys)+1)...)
c.values = append(c.values, make([]ml.Tensor, i-len(c.values)+1)...)
}
return &Simple{
keys: c.keys[i : i+1],
values: c.values[i : i+1],
Capacity: c.Capacity,
DType: c.DType,
}
}
func (c *Simple) Put(ctx ml.Context, key, value ml.Tensor, opts Options) (ml.Tensor, ml.Tensor) {
if c.keys[0] == nil || c.values[0] == nil {
c.keys[0] = ctx.Zeros(c.DType, int(key.Dim(0)*key.Dim(1))*c.Capacity)
c.values[0] = ctx.Zeros(c.DType, int(value.Dim(0)*value.Dim(1))*c.Capacity)
}
ctx.Forward(key.Copy(ctx, c.keys[0].View(ctx, int(key.Stride(2))*opts.Position, int(key.Dim(0)*key.Dim(1)*key.Dim(2)))))
ctx.Forward(value.Copy(ctx, c.values[0].View(ctx, int(value.Stride(2))*opts.Position, int(value.Dim(0)*value.Dim(1)*value.Dim(2)))))
n := min(c.Capacity, int(key.Dim(2))+opts.Position)
key = c.keys[0].View(ctx, 0,
int(key.Dim(0)), int(key.Stride(1)),
int(key.Dim(1)), int(key.Stride(2)),
n,
)
value = c.values[0].View(ctx, 0,
int(value.Dim(0)), int(value.Stride(1)),
int(value.Dim(1)), int(value.Stride(2)),
n,
)
// TODO shift context if necessary
return key, value
}

View File

@@ -59,7 +59,7 @@ func getModelfileName(cmd *cobra.Command) (string, error) {
_, err = os.Stat(absName)
if err != nil {
return "", err
return filename, err
}
return absName, nil

View File

@@ -279,7 +279,7 @@ func TestGetModelfileName(t *testing.T) {
name: "no modelfile specified, no modelfile exists",
modelfileName: "",
fileExists: false,
expectedName: "",
expectedName: "Modelfile",
expectedErr: os.ErrNotExist,
},
{
@@ -293,7 +293,7 @@ func TestGetModelfileName(t *testing.T) {
name: "modelfile specified, no modelfile exists",
modelfileName: "crazyfile",
fileExists: false,
expectedName: "",
expectedName: "crazyfile",
expectedErr: os.ErrNotExist,
},
{

View File

@@ -9,7 +9,7 @@ import (
"log/slog"
"strings"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)
type ModelParameters struct {
@@ -27,8 +27,8 @@ type AdapterParameters struct {
} `json:"lora_parameters"`
}
func (ModelParameters) KV(t *Tokenizer) llm.KV {
kv := llm.KV{
func (ModelParameters) KV(t *Tokenizer) ggml.KV {
kv := ggml.KV{
"general.file_type": uint32(1),
"general.quantization_version": uint32(2),
"tokenizer.ggml.pre": t.Pre,
@@ -54,7 +54,7 @@ func (ModelParameters) KV(t *Tokenizer) llm.KV {
return kv
}
func (p AdapterParameters) KV() llm.KV {
func (p AdapterParameters) KV() ggml.KV {
var alpha float32
if p.LoraParameters.Alpha == 0 {
alpha = float32(p.Alpha)
@@ -62,7 +62,7 @@ func (p AdapterParameters) KV() llm.KV {
alpha = p.LoraParameters.Alpha
}
kv := llm.KV{
kv := ggml.KV{
"adapter.lora.alpha": alpha,
"adapter.type": "lora",
"general.file_type": uint32(1),
@@ -79,19 +79,19 @@ func (ModelParameters) specialTokenTypes() []string {
}
}
func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
return llm.WriteGGUF(ws, kv, ts)
func (ModelParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
return ggml.WriteGGUF(ws, kv, ts)
}
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
return llm.WriteGGUF(ws, kv, ts)
func (AdapterParameters) writeFile(ws io.WriteSeeker, kv ggml.KV, ts []ggml.Tensor) error {
return ggml.WriteGGUF(ws, kv, ts)
}
type ModelConverter interface {
// KV maps parameters to LLM key-values
KV(*Tokenizer) llm.KV
KV(*Tokenizer) ggml.KV
// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor
Tensors([]Tensor) []ggml.Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string
@@ -99,7 +99,7 @@ type ModelConverter interface {
// specialTokenTypes returns any special token types the model uses
specialTokenTypes() []string
// writeFile writes the model to the provided io.WriteSeeker
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
}
type moreParser interface {
@@ -108,17 +108,17 @@ type moreParser interface {
type AdapterConverter interface {
// KV maps parameters to LLM key-values
KV(llm.KV) llm.KV
KV(ggml.KV) ggml.KV
// Tensors maps input tensors to LLM tensors. Adapter specific modifications can be done here.
Tensors([]Tensor) []llm.Tensor
Tensors([]Tensor) []ggml.Tensor
// Replacements returns a list of string pairs to replace in tensor names.
// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
Replacements() []string
writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
writeFile(io.WriteSeeker, ggml.KV, []ggml.Tensor) error
}
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV ggml.KV) error {
bts, err := fs.ReadFile(fsys, "adapter_config.json")
if err != nil {
return err
@@ -191,10 +191,6 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
conv = &qwen2Model{}
case "BertModel":
conv = &bertModel{}
case "CohereForCausalLM":
conv = &commandrModel{}
case "Cohere2ForCausalLM":
conv = &cohere2Model{}
default:
return errors.New("unsupported architecture")
}

View File

@@ -8,7 +8,7 @@ import (
"slices"
"strings"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)
type bertModel struct {
@@ -85,7 +85,7 @@ func (p *bertModel) parseMore(fsys fs.FS) error {
return nil
}
func (p *bertModel) KV(t *Tokenizer) llm.KV {
func (p *bertModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "bert"
kv["bert.attention.causal"] = false
@@ -132,8 +132,8 @@ func (p *bertModel) KV(t *Tokenizer) llm.KV {
return kv
}
func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (p *bertModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
for _, t := range ts {
if slices.Contains([]string{
"embeddings.position_ids",
@@ -143,7 +143,7 @@ func (p *bertModel) Tensors(ts []Tensor) []llm.Tensor {
continue
}
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),

View File

@@ -1,85 +0,0 @@
package convert
import (
"cmp"
"github.com/ollama/ollama/llm"
)
type cohere2Model struct {
ModelParameters
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
HiddenSize uint32 `json:"hidden_size"`
HiddenLayers uint32 `json:"num_hidden_layers"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
LayerNormEPS float32 `json:"layer_norm_eps"`
RopeTheta float32 `json:"rope_theta"`
UseQKNorm bool `json:"use_qk_norm"`
MaxLength uint32 `json:"model_max_length"`
LogitScale float32 `json:"logit_scale"`
NCtx uint32 `json:"n_ctx"`
SlidingWindow uint32 `json:"sliding_window"`
HeadDim uint32 `json:"head_dim"`
RotaryPct float32 `json:"rotary_pct"`
VocabSize uint32 `json:"vocab_size"`
}
var _ ModelConverter = (*cohere2Model)(nil)
func (p *cohere2Model) KV(t *Tokenizer) llm.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "cohere2"
kv["general.name"] = "C4Ai Command R7B"
kv["cohere2.context_length"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings, p.NCtx)
kv["cohere2.embedding_length"] = p.HiddenSize
kv["cohere2.block_count"] = p.HiddenLayers
kv["cohere2.feed_forward_length"] = p.IntermediateSize
kv["cohere2.attention.head_count"] = p.NumAttentionHeads
kv["cohere2.attention.head_count_kv"] = p.NumKeyValueHeads
kv["cohere2.attention.key_length"] = p.HeadDim
kv["cohere2.attention.layer_norm_epsilon"] = p.LayerNormEPS
kv["cohere2.attention.sliding_window"] = p.SlidingWindow
kv["cohere2.attention.value_length"] = p.HeadDim
kv["cohere2.max_position_embeddings"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings)
kv["cohere2.logit_scale"] = p.LogitScale
kv["cohere2.rope.dimension_count"] = uint32(p.RotaryPct * float32(p.HiddenSize/p.NumAttentionHeads))
kv["cohere2.rope.freq_base"] = p.RopeTheta
kv["cohere2.rope.scaling.type"] = "none"
kv["cohere2.vocab_size"] = p.VocabSize
return kv
}
func (p *cohere2Model) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
for _, t := range ts {
out = append(out, llm.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *cohere2Model) Replacements() []string {
return []string{
"self_attn.q_norm", "attn_q_norm",
"self_attn.k_norm", "attn_k_norm",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"mlp.down_proj", "ffn_down",
"mlp.gate_proj", "ffn_gate",
"mlp.up_proj", "ffn_up",
"self_attn.k_proj", "attn_k",
"self_attn.o_proj", "attn_output",
"self_attn.q_proj", "attn_q",
"self_attn.v_proj", "attn_v",
"model.norm", "output_norm",
"model.embed_tokens", "token_embd",
}
}

View File

@@ -1,76 +0,0 @@
package convert
import (
"cmp"
"github.com/ollama/ollama/llm"
)
type commandrModel struct {
ModelParameters
MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
HiddenSize uint32 `json:"hidden_size"`
HiddenLayers uint32 `json:"num_hidden_layers"`
IntermediateSize uint32 `json:"intermediate_size"`
NumAttentionHeads uint32 `json:"num_attention_heads"`
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
LayerNormEPS float32 `json:"layer_norm_eps"`
RopeTheta float32 `json:"rope_theta"`
UseQKNorm bool `json:"use_qk_norm"`
MaxLength uint32 `json:"model_max_length"`
LogitScale float32 `json:"logit_scale"`
NCtx uint32 `json:"n_ctx"`
}
var _ ModelConverter = (*commandrModel)(nil)
func (p *commandrModel) KV(t *Tokenizer) llm.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "command-r"
kv["general.name"] = "command-r"
kv["command-r.context_length"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings, p.NCtx)
kv["command-r.embedding_length"] = p.HiddenSize
kv["command-r.block_count"] = p.HiddenLayers
kv["command-r.feed_forward_length"] = p.IntermediateSize
kv["command-r.attention.head_count"] = p.NumAttentionHeads
kv["command-r.attention.head_count_kv"] = p.NumKeyValueHeads
kv["command-r.attention.layer_norm_epsilon"] = p.LayerNormEPS
kv["command-r.rope.freq_base"] = p.RopeTheta
kv["command-r.max_position_embeddings"] = cmp.Or(p.MaxLength, p.MaxPositionEmbeddings)
kv["command-r.logit_scale"] = p.LogitScale
kv["command-r.rope.scaling.type"] = "none"
return kv
}
func (p *commandrModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
for _, t := range ts {
out = append(out, llm.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),
WriterTo: t,
})
}
return out
}
func (p *commandrModel) Replacements() []string {
return []string{
"self_attn.q_norm", "attn_q_norm",
"self_attn.k_norm", "attn_k_norm",
"model.layers", "blk",
"input_layernorm", "attn_norm",
"mlp.down_proj", "ffn_down",
"mlp.gate_proj", "ffn_gate",
"mlp.up_proj", "ffn_up",
"self_attn.k_proj", "attn_k",
"self_attn.o_proj", "attn_output",
"self_attn.q_proj", "attn_q",
"self_attn.v_proj", "attn_v",
"model.norm", "output_norm",
"model.embed_tokens", "token_embd",
}
}

View File

@@ -6,7 +6,7 @@ import (
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)
type gemmaModel struct {
@@ -23,7 +23,7 @@ type gemmaModel struct {
var _ ModelConverter = (*gemmaModel)(nil)
func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
func (p *gemmaModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma"
kv["gemma.context_length"] = p.MaxPositionEmbeddings
@@ -42,14 +42,14 @@ func (p *gemmaModel) KV(t *Tokenizer) llm.KV {
return kv
}
func (p *gemmaModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (p *gemmaModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
for _, t := range ts {
if strings.HasSuffix(t.Name(), "_norm.weight") {
t.SetRepacker(p.addOne)
}
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),

View File

@@ -1,8 +1,6 @@
package convert
import (
"github.com/ollama/ollama/llm"
)
import "github.com/ollama/ollama/fs/ggml"
type gemma2Model struct {
gemmaModel
@@ -11,7 +9,7 @@ type gemma2Model struct {
FinalLogitSoftcap float32 `json:"final_logit_softcapping"`
}
func (p *gemma2Model) KV(t *Tokenizer) llm.KV {
func (p *gemma2Model) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "gemma2"
kv["gemma2.context_length"] = p.MaxPositionEmbeddings

View File

@@ -6,7 +6,7 @@ import (
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)
type gemma2Adapter struct {
@@ -15,14 +15,14 @@ type gemma2Adapter struct {
var _ AdapterConverter = (*gemma2Adapter)(nil)
func (p *gemma2Adapter) KV(baseKV llm.KV) llm.KV {
func (p *gemma2Adapter) KV(baseKV ggml.KV) ggml.KV {
kv := p.AdapterParameters.KV()
kv["general.architecture"] = "gemma2"
return kv
}
func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (p *gemma2Adapter) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
for _, t := range ts {
shape := t.Shape()
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
@@ -31,7 +31,7 @@ func (p *gemma2Adapter) Tensors(ts []Tensor) []llm.Tensor {
t.SetRepacker(p.repack)
}
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),

View File

@@ -9,7 +9,7 @@ import (
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)
type llamaModel struct {
@@ -46,7 +46,7 @@ type llamaModel struct {
var _ ModelConverter = (*llamaModel)(nil)
func (p *llamaModel) KV(t *Tokenizer) llm.KV {
func (p *llamaModel) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "llama"
kv["llama.vocab_size"] = p.VocabSize
@@ -120,11 +120,11 @@ func (p *llamaModel) KV(t *Tokenizer) llm.KV {
return kv
}
func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (p *llamaModel) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
if p.RopeScaling.factors != nil {
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: "rope_freqs.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.factors))},
@@ -138,7 +138,7 @@ func (p *llamaModel) Tensors(ts []Tensor) []llm.Tensor {
t.SetRepacker(p.repack)
}
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),

View File

@@ -7,7 +7,7 @@ import (
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)
type llamaAdapter struct {
@@ -18,7 +18,7 @@ type llamaAdapter struct {
var _ AdapterConverter = (*llamaAdapter)(nil)
func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
func (p *llamaAdapter) KV(baseKV ggml.KV) ggml.KV {
kv := p.AdapterParameters.KV()
kv["general.architecture"] = "llama"
kv["llama.attention.head_count"] = baseKV["llama.attention.head_count"]
@@ -29,8 +29,8 @@ func (p *llamaAdapter) KV(baseKV llm.KV) llm.KV {
return kv
}
func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (p *llamaAdapter) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
for _, t := range ts {
shape := t.Shape()
if (strings.HasSuffix(t.Name(), "weight.lora_a") && shape[0] > shape[1]) ||
@@ -41,7 +41,7 @@ func (p *llamaAdapter) Tensors(ts []Tensor) []llm.Tensor {
t.SetRepacker(p.repack)
}
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: shape,

View File

@@ -6,7 +6,7 @@ import (
"slices"
"strings"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)
type mixtralModel struct {
@@ -15,7 +15,7 @@ type mixtralModel struct {
NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
}
func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
func (p *mixtralModel) KV(t *Tokenizer) ggml.KV {
kv := p.llamaModel.KV(t)
if p.NumLocalExperts > 0 {
@@ -29,7 +29,7 @@ func (p *mixtralModel) KV(t *Tokenizer) llm.KV {
return kv
}
func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
func (p *mixtralModel) Tensors(ts []Tensor) []ggml.Tensor {
oldnew := []string{
"model.layers", "blk",
"w1", "ffn_gate_exps",
@@ -56,10 +56,10 @@ func (p *mixtralModel) Tensors(ts []Tensor) []llm.Tensor {
return true
})
var out []llm.Tensor
var out []ggml.Tensor
for n, e := range experts {
// TODO(mxyng): sanity check experts
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: n,
Kind: e[0].Kind(),
Shape: append([]uint64{uint64(len(e))}, e[0].Shape()...),

View File

@@ -8,7 +8,7 @@ import (
"strings"
"sync"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)
type phi3Model struct {
@@ -37,7 +37,7 @@ type phi3Model struct {
var _ ModelConverter = (*phi3Model)(nil)
func (p *phi3Model) KV(t *Tokenizer) llm.KV {
func (p *phi3Model) KV(t *Tokenizer) ggml.KV {
kv := p.ModelParameters.KV(t)
kv["general.architecture"] = "phi3"
kv["phi3.context_length"] = p.MaxPositionEmbeddings
@@ -68,19 +68,19 @@ func (p *phi3Model) KV(t *Tokenizer) llm.KV {
return kv
}
func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
func (p *phi3Model) Tensors(ts []Tensor) []ggml.Tensor {
var addRopeFactors sync.Once
out := make([]llm.Tensor, 0, len(ts)+2)
out := make([]ggml.Tensor, 0, len(ts)+2)
for _, t := range ts {
if strings.HasPrefix(t.Name(), "blk.0.") {
addRopeFactors.Do(func() {
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: "rope_factors_long.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
WriterTo: p.RopeScaling.LongFactor,
}, llm.Tensor{
}, ggml.Tensor{
Name: "rope_factors_short.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
@@ -89,7 +89,7 @@ func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
})
}
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),

View File

@@ -1,6 +1,7 @@
package convert
import "github.com/ollama/ollama/llm"
import "github.com/ollama/ollama/fs/ggml"
type qwen2Model struct {
ModelParameters
@@ -21,7 +22,7 @@ type qwen2Model struct {
var _ ModelConverter = (*qwen2Model)(nil)
func (q *qwen2Model) KV(t *Tokenizer) llm.KV {
func (q *qwen2Model) KV(t *Tokenizer) ggml.KV {
kv := q.ModelParameters.KV(t)
kv["general.architecture"] = "qwen2"
kv["qwen2.block_count"] = q.HiddenLayers
@@ -45,10 +46,10 @@ func (q *qwen2Model) KV(t *Tokenizer) llm.KV {
return kv
}
func (q *qwen2Model) Tensors(ts []Tensor) []llm.Tensor {
var out []llm.Tensor
func (q *qwen2Model) Tensors(ts []Tensor) []ggml.Tensor {
var out []ggml.Tensor
for _, t := range ts {
out = append(out, llm.Tensor{
out = append(out, ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),

View File

@@ -20,7 +20,7 @@ import (
"golang.org/x/exp/maps"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/fs/ggml"
)
type tensorData struct {
@@ -29,9 +29,7 @@ type tensorData struct {
Shape []int `json:"shape"`
}
var generate string
func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
func convertFull(t *testing.T, fsys fs.FS) (*os.File, ggml.KV, ggml.Tensors) {
t.Helper()
f, err := os.CreateTemp(t.TempDir(), "f16")
@@ -50,7 +48,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
}
t.Cleanup(func() { r.Close() })
m, _, err := llm.DecodeGGML(r, math.MaxInt)
m, _, err := ggml.Decode(r, math.MaxInt)
if err != nil {
t.Fatal(err)
}
@@ -62,7 +60,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, *llm.Tensors) {
return r, m.KV(), m.Tensors()
}
func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tensors) map[string]string {
func generateResultsJSON(t *testing.T, f *os.File, kv ggml.KV, tensors ggml.Tensors) map[string]string {
actual := make(map[string]string)
for k, v := range kv {
if s, ok := v.(json.Marshaler); !ok {
@@ -77,7 +75,7 @@ func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tenso
}
}
for _, tensor := range tensors.Items {
for _, tensor := range tensors.Items() {
sha256sum := sha256.New()
sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
if _, err := io.Copy(sha256sum, sr); err != nil {
@@ -93,7 +91,6 @@ func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors *llm.Tenso
func TestMain(m *testing.M) {
var level slog.Level
flag.TextVar(&level, "level", slog.LevelInfo, "log level")
flag.StringVar(&generate, "generate", "", "generate model data")
flag.Parse()
slog.SetLogLoggerLevel(level)
os.Exit(m.Run())
@@ -112,8 +109,6 @@ func TestConvertModel(t *testing.T) {
"all-MiniLM-L6-v2",
"gemma-2-9b-it",
"Qwen2.5-0.5B-Instruct",
"c4ai-command-r-v01",
"c4ai-command-r7b-12-2024",
}
for i := range cases {
@@ -131,19 +126,6 @@ func TestConvertModel(t *testing.T) {
f, kv, tensors := convertFull(t, os.DirFS(p))
actual := generateResultsJSON(t, f, kv, tensors)
if generate != "" && generate == tt {
outFile := filepath.Join("testdata", fmt.Sprintf("%s.json", tt))
data, err := json.MarshalIndent(actual, "", " ")
if err != nil {
t.Fatal(err)
}
if err := os.WriteFile(outFile, data, 0o644); err != nil {
t.Fatal(err)
}
t.Logf("Generated expected results for %s", tt)
return
}
expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
if err != nil {
t.Fatal(err)
@@ -349,7 +331,7 @@ func TestConvertAdapter(t *testing.T) {
}
defer r.Close()
m, _, err := llm.DecodeGGML(r, math.MaxInt)
m, _, err := ggml.Decode(r, math.MaxInt)
if err != nil {
t.Fatal(err)
}

View File

@@ -1,344 +0,0 @@
{
"general.architecture": "command-r",
"general.name": "command-r",
"command-r.attention.head_count": "64",
"command-r.attention.head_count_kv": "64",
"command-r.attention.layer_norm_epsilon": "1e-05",
"command-r.block_count": "40",
"command-r.context_length": "131072",
"command-r.embedding_length": "8192",
"command-r.feed_forward_length": "22528",
"command-r.logit_scale": "0.0625",
"command-r.rope.freq_base": "8e+06",
"command-r.rope.scaling.type": "none",
"tokenizer.ggml.add_bos_token": "true",
"tokenizer.ggml.add_eos_token": "false",
"tokenizer.ggml.bos_token_id": "5",
"tokenizer.ggml.eos_token_id": "255001",
"tokenizer.ggml.merges": "902a060cac8884a5793d2a857dd2e53a259de46c8d08c4deb243c239671e1350",
"tokenizer.ggml.model": "gpt2",
"tokenizer.ggml.padding_token_id": "0",
"tokenizer.ggml.token_type": "b7a352ccd1c99d4413bcf452c2db707b0526d0e1216616b865560fab80296462",
"tokenizer.ggml.tokens": "815ac90ff23565081522d7258f46648c8a0619eb847a9c7c31b238a9b984e4ae",
"blk.0.attn_k.weight": "6fcfdb466f9ceb1229404ce4ec4e480751b8d00da12707a11783dad7256cb864",
"blk.0.attn_norm.weight": "6063317f731371864049c7704a70772f1eb632194201ebdc2ed0f8e483507c72",
"blk.0.attn_output.weight": "920f49716a1e2fc73b6794ec777947f1c122701e63ed302422ac89e90f06e9da",
"blk.0.attn_q.weight": "ddbcd7cde197e632564ac58e4f25d9e3a8ca52917329eeb6081eb41a797932ab",
"blk.0.attn_v.weight": "318fc02a189d87420f0cbf57f47f11e00c21ec1ed472ce0a2a895b44f7fa0fca",
"blk.0.ffn_down.weight": "aa71975b6eb1f4c77b03d2ac4a194cf8d95718efac741bb12f0f3ff79a27f9bc",
"blk.0.ffn_gate.weight": "42967702fa0bc738b88dc50007ace26dbe74a5a9e0978124dd093f818241a9e1",
"blk.0.ffn_up.weight": "5282c8788b086bd30f46525e7995a17464882a72703fd27165491afdd8bfd4af",
"blk.1.attn_k.weight": "cd248882e64fd2c3402c44790ebe12440133dc671b6893fdad0564c461973adc",
"blk.1.attn_norm.weight": "ba84e1c8fd30af6ec94208db4078befac8c921aad3acb887812887f3282ea2be",
"blk.1.attn_output.weight": "2efa3ef7c5666ccceb05e339b83ad680cc0d2c3ec78203f5da5959f23a80e14f",
"blk.1.attn_q.weight": "5106f2e255358a1303c22e8b5f0ec044852bb30a866c52cabefd30017a7a6b7d",
"blk.1.attn_v.weight": "a211a634a1a5df1d5f973645438be0461dd922210f9747c6b04e386c7f1ebe95",
"blk.1.ffn_down.weight": "37093afe48d32c578ec956c9ed85242cd000d6aa979e60526aafa10c822dbb10",
"blk.1.ffn_gate.weight": "469860819e9159caefb1aad0bc66db790f3393f05fd87b08e52256a7ed256543",
"blk.1.ffn_up.weight": "736742c97d35d1a011f9cafd3c0ce947ad559bb2fba6da73c816f6bfd0fa9aeb",
"blk.2.attn_k.weight": "92c219d92804d832ab404bd6dc7339c90877bb7cf405dd030c121f8b27757739",
"blk.2.attn_norm.weight": "61e4466069474b76b6d1e702566420eb669faf3556b00ff7b824784aca13a2d6",
"blk.2.attn_output.weight": "d2fb38a2b2171fd91caf037faa585a62225819aa232d86fd4f7f9d2c3c8a45e9",
"blk.2.attn_q.weight": "f6faf5cc6844e3daa4f9f68d90f5458c64879de68a7728860e38374e30c3429d",
"blk.2.attn_v.weight": "f340ef8f7341d987a6f37c0e9afe0aef5be67be00c0ce5f57612daf73319cce1",
"blk.2.ffn_down.weight": "c7be61a701d779860b621b143fb6365b607bf99ec7c0f153b07908ac8120885a",
"blk.2.ffn_gate.weight": "b64f0878187bd3392abfa4c3e8ad2f8b4c133903e54246747ff8f3b4639ad83e",
"blk.2.ffn_up.weight": "50b11c712652e90ee7428dbb45cffebb80662ac982bc72bd9eafff361b5eb5a8",
"blk.3.attn_k.weight": "2b7bcbe9ee5c9c630c8c8d7483887e78b73581016f4cbb6933db2a147a25f431",
"blk.3.attn_norm.weight": "0181dac7f4eee7252980323e8032cf339bef2046ce0a16c0fd72af7c98a8a37b",
"blk.3.attn_output.weight": "aef8843b636ce231da9e7c9acbee197883cc15df0e2887709324c6a50f16da7b",
"blk.3.attn_q.weight": "55404130fa10e81322d33eb378aa0de31a92990ce7730f1338c0ace0406bb1b1",
"blk.3.attn_v.weight": "76f7fb8040d82b957d689ce34fea2302a6640ad5bbaa0052ad2b7ebce270c33d",
"blk.3.ffn_down.weight": "648628933eff3b357c3729c33c5b1ae51c28e59b9c19acd1601a2ff7c5d5d9a5",
"blk.3.ffn_gate.weight": "6a588885d16e98d5f50ebed05af089154f680085ca9c97691e5b489088630a4a",
"blk.3.ffn_up.weight": "e12455a1d702f4986e1a663493e3d5102b367af74d45557522002a35d63ecac2",
"blk.4.attn_k.weight": "40d943380a8a85e4eab147934bf6e16f23cc8ab753f6636526382c074d182288",
"blk.4.attn_norm.weight": "4ab2c098983d4599fe540eef624c4df954adb7473faebda7471ef0ba4134814c",
"blk.4.attn_output.weight": "d14b91e40f58bf4a3c8c2eca0b12bb541de406574af39027d56f6c588a147082",
"blk.4.attn_q.weight": "e1224960a3562107488589f883fa32414bae41712fa8dbd47c5f3e3a7801452f",
"blk.4.attn_v.weight": "063f297bc4aa6e709fc32c4c32e35af7d07d80e83cb939b76adbba858006c03d",
"blk.4.ffn_down.weight": "f88a18020c5e1caaa29596895eb348e76ee5bfad27ed57651a86cd8cd1f9b5aa",
"blk.4.ffn_gate.weight": "48e7e1eed3fb52e92e61d3557dd0ec002418327090e034ce4322fd68542266f8",
"blk.4.ffn_up.weight": "1ca8a7aa17355b6ce0d9ad5539fdad3899fa47fd359c285fbfb31f19f47bf073",
"blk.5.attn_k.weight": "2bdf15f8e73d068d972380f25d207004cf0bf3b5bfa46946803ba6fba07d9175",
"blk.5.attn_norm.weight": "60448d7cde6e1b6467aa31bdea012e39cdb08c88081cee7d102dca4f93f766ef",
"blk.5.attn_output.weight": "f9f687d7c457537f9fca8a4087a59f1c3bebfaf5537b94e42c831a13224f7799",
"blk.5.attn_q.weight": "987db7a2ad68657a92625e1980effbb1f79697c2183f2b9f3b3a0570c51b0ab9",
"blk.5.attn_v.weight": "cf696891148f3e4783ad1d20f93462ae091eb8651c656bba9b662253b6263e02",
"blk.5.ffn_down.weight": "c0662b0bd0929136005fb9d691fdd9b2c33867d9ce9622339a6a456b720b059a",
"blk.5.ffn_gate.weight": "200bbdfab615d7a3a84719b6ced7751e3ce52757ef212d96f87798bc1de5e987",
"blk.5.ffn_up.weight": "df5d23e7e035fb1b9d163da7ddfdfe38da6a37e86e96534dc02ad20f011b55b3",
"blk.6.attn_k.weight": "c0dae2d272a7c5a2fa004bbb8475dbab362fc1f6d008e73d5a4434a9382ac6ba",
"blk.6.attn_norm.weight": "51c57ac8b55e04354d5dca6bb9c0cf4177639d3b038e80209e33036209688f64",
"blk.6.attn_output.weight": "229d97892c62f85bcdf431675250e01c976ad69ffa450b01fb543bf88f14a2fb",
"blk.6.attn_q.weight": "c20e49621821bd46ed156e6823864a5bda4f317750e71ab8dc54e44eb48cf7c2",
"blk.6.attn_v.weight": "53ceb1a2ee43fce3c7b5b33c58a9fc5ee7f44dc1c6f29bc9dbefc37582102dc9",
"blk.6.ffn_down.weight": "7923c943b7629d560a032d1efa210d1d75c6692140f1be94464ee7ed24f44ed0",
"blk.6.ffn_gate.weight": "57593d350361af753a6a39f53b066282634c0fb44f396f6f2966a574b01d8f8c",
"blk.6.ffn_up.weight": "327b6a7a387098b8899d3ded04a4d4e7c658ca61b80d4e7b17594be232721602",
"blk.7.attn_k.weight": "9ca48b87a10116fd8868e62b76f211d4bb91f166096be9061439ee2e1c3a5c20",
"blk.7.attn_norm.weight": "cd56cfcc4e2ad6b96e23ea7b0d32b4caf236107d99a0b22c56760b62e63c8cfd",
"blk.7.attn_output.weight": "7352b509a03cae2491ffc060e577d189341a0f861233f18c96f9d275dc4234bf",
"blk.7.attn_q.weight": "2b3791c8c008c33ddbe12bedba8191322ceea2dcce5cf0eb7a93d40ad254e672",
"blk.7.attn_v.weight": "3ae721d52466487a3d48150581e57f6d64ea1e83ab929f23b28c3d777422eeb6",
"blk.7.ffn_down.weight": "3b6fa8ececdb3c34af3a5363863d6f94289c1c95bf47fce3a3ddcf184c5f0848",
"blk.7.ffn_gate.weight": "dbd7df6c5ae5eb4adb859f0d36453813a4e289a359a1ba8f72d67fcbf21c3e22",
"blk.7.ffn_up.weight": "de68380a334b4c5cfd4c318b0e9854aec59bd79aa0f0c30af3f56414f83482b0",
"blk.8.attn_k.weight": "7303c4e4480abc72a7ee271811311199245fb5c2ea27a2bd3b8cad3a53a03c27",
"blk.8.attn_norm.weight": "2e3d1921898d1b943ce1a1b6818546c8b471d6d542da24f51a8b514b8c3dd4ef",
"blk.8.attn_output.weight": "30421520887b66bf97a18dbcdc283bc8d0b60590b612fd638a319a6eae923227",
"blk.8.attn_q.weight": "73e064d5433c9b500068a1c31744dbd53f4ade298fb450a0e8c97f62cf1f8a8d",
"blk.8.attn_v.weight": "27e21f8b9a9a8533e8178ca34a72aa1d786393d57302b7806dcdf3e51de511a8",
"blk.8.ffn_down.weight": "bf694bd8e00047982108000e7b3dee7b225db8b19abc595e5697b6bbefd92e7c",
"blk.8.ffn_gate.weight": "d55fdbf8606d9141b774b0500c58944fd1253b9e69d1f765eaa9a680b9f2ca40",
"blk.8.ffn_up.weight": "1ae3f580655e7c8e8dd6c34fa4ac574fdfc5e3f1a8536da0c5442d3a2976f0e7",
"blk.9.attn_k.weight": "b18080626012d8aabcf78542d6c7bf31c712bf55a70172fbfe173fcf34481036",
"blk.9.attn_norm.weight": "2e3620620dc09998c6d3063a7d5de5433fbbae8c11e5b00d13f145d39140e162",
"blk.9.attn_output.weight": "69c3c0e27ef1c0fc933eeb7b612b70909f18cde238873c0d576a2ba9714ef174",
"blk.9.attn_q.weight": "68330e5aa28a28873c9a6e67f032186ef651df2df5844e0f27094ba349fbe4ab",
"blk.9.attn_v.weight": "3df8d45a102be082d0793a51cb82aa62a43cd0e9d047ba4115ca0f2414b39325",
"blk.9.ffn_down.weight": "1d6cc162b73745b135b4f040a0aac3c06d5135a3dc5b2421e7ee2af48662fd7f",
"blk.9.ffn_gate.weight": "034a9d40fb1e32b534b45f4bccd65cbe43c4a6a3f5d01132bd245ca0005de5fc",
"blk.9.ffn_up.weight": "c838c38d0e1a0ac0da17eb2a66023ed31929f07d8fcfe1cc546df26096c91f0c",
"blk.10.attn_k.weight": "a78507cb72f744b86ceaa032596e74e5571c822d0226d334881169addb32cbd5",
"blk.10.attn_norm.weight": "35f48d0b28ee0e6b4cad4e983925737562d64824be5b168b3e26df3d6b260cf1",
"blk.10.attn_output.weight": "53712db06796de39b131323e7abf9a58551b6d52da6db66a471580386d396252",
"blk.10.attn_q.weight": "efe08429ba196026b81cd1c471e1c7418afd9e966659feb3936b674aa0803b58",
"blk.10.attn_v.weight": "7ec6055e134f89da0cbe79ec9f13ef2e442ac584b1f03c3e13e7d0cdad0078bd",
"blk.10.ffn_down.weight": "37e66af4bcd1f3079e841e892255b8255070655901864ea3a8c602a7f681a640",
"blk.10.ffn_gate.weight": "1825282bc34830d371c6edcc3c1e73e6ecc1e10f4aea0122dbb7acc1d6f7b1bc",
"blk.10.ffn_up.weight": "819b3b276a4d4c14a35ed6682d5ef18a5e8ed468e5ce3f12e8c75ec18ac20ec4",
"blk.11.attn_k.weight": "5327e6a2af82dfff0619a14971f5864a15553c36fead84e1af42c7630f2729c6",
"blk.11.attn_norm.weight": "fec363b3c4a43036d2c635fb8aa9e122dd87ee79811839f2f6cd955be3373e7b",
"blk.11.attn_output.weight": "ccf7b38f18ee8798b8a6a35018e2df3eb3e007de62876befb68025dd66c79763",
"blk.11.attn_q.weight": "da8c4a1c824ffe174e39f126cd72f7ef83c56aff1259d452a1212de80f98f5e9",
"blk.11.attn_v.weight": "d17ae6bb77f03982b55d341eb67acb5969e9ad3da5994b96eafc09793dcfe3a0",
"blk.11.ffn_down.weight": "a6bac521e2791345f22c57205fa1c2f2f687794dfd24d0e98d50ae0d0eb6088a",
"blk.11.ffn_gate.weight": "5ed902c488cb51ba5635f3df08258c5f84f31a679a00211ea5f9d8b824ef6d9d",
"blk.11.ffn_up.weight": "ee9f1437eb890d2cf9df2574afa1cecf20aafdd847cd75b152d7eb74419afd34",
"blk.12.attn_k.weight": "5a069c06e1019b0f889088e67458f7a11ec77fa190ada6069e46211f62219947",
"blk.12.attn_norm.weight": "194d7e5fcc8c49aea62daf1940532419cf3c505afdce6be377286b677db5db8f",
"blk.12.attn_output.weight": "6534995fd4d6fecb55e317add4b1723aba4d825e1e9471d0b08813dfdc247176",
"blk.12.attn_q.weight": "4ab51ca519b5995581fa34f846276feca3b907ef2b51f192f6cc0b3263c3f5a2",
"blk.12.attn_v.weight": "5652ca3fa81ef9a1ac1543d71fc6813f8517f8ec54b25c701f6f98061614830f",
"blk.12.ffn_down.weight": "4b2c263f54c88516b8eb273bb8d9615b01c5c8b484dc70358adb91b50b300edd",
"blk.12.ffn_gate.weight": "8f50c3c3e3e8568991d6c1b0e74b500cf4f208e7700bbb8e87c3f6a6d359b6b5",
"blk.12.ffn_up.weight": "1c1a581fec1fbe959e1427fa513f400100b5e1ee9d83932630be9905fb49c231",
"blk.13.attn_k.weight": "efd7a38c46f08d8376d82974f33c644e3a02220e142d63b1704718699a8a884c",
"blk.13.attn_norm.weight": "d28fa4f1bd75abbd063b0e622e08f579c89cd0c0c5ce63c1952ec9f944f8ee13",
"blk.13.attn_output.weight": "71e0068a639288718bdb70a6cfdefd50bc8b3ec3993347a65129e70001ca5827",
"blk.13.attn_q.weight": "b97077adc92cff07a2e07d80ee38f214ad8713571c69cd5c70ebd43dc501ac87",
"blk.13.attn_v.weight": "79b3e2749ab4b459c81e96e322b215f1e8af645eb346e176c326bd00cf6ed2fd",
"blk.13.ffn_down.weight": "9f8687d11effa1db7cfecf7bec5631734bcf2962aad74a9f519144491e08ec85",
"blk.13.ffn_gate.weight": "7d14dfa0543852e7777fe8fff29ca533744cbcf1ebcf10067e5adfc4eb345e65",
"blk.13.ffn_up.weight": "852b9527b97fdab211ff3f832a660ee1d93ccb56906144c50f01319a6e8ee615",
"blk.14.attn_k.weight": "79e926b20f36f66d58226cb358881f2f68ae7b468787d33cafae5110287a14a0",
"blk.14.attn_norm.weight": "97d481b63deb0df6142c2c6cd23043720c62eb609e390f47a7113751c79974ec",
"blk.14.attn_output.weight": "aa6e94d7176d5c79fbb89b96e5f13ce75702ce3dd23ee52986446da436a6c3d6",
"blk.14.attn_q.weight": "214becb6d1bb460da9fb8ace0f99b9a5afa9edf7aa7acc19606c7401b11d6305",
"blk.14.attn_v.weight": "488b0e6d7f1a7a2ed0972aaa6d10ef9c775ee5373460324efcf5b3e3da9311df",
"blk.14.ffn_down.weight": "29c7ad16cf9542e30996a1a01ab95b844533b28051f04cc7949c371afb796471",
"blk.14.ffn_gate.weight": "b7ef208f2b054803665b377f5a5980c122c026841809cf855c6ba06d1c3a885a",
"blk.14.ffn_up.weight": "76a5cc28100748d79c4398ce7b9176aab4d661548b6293a82f99144812e5b70e",
"blk.15.attn_k.weight": "a6b8f9e98ab878fa7ebc5d080978ebf2d050acc2ab2fa8ea9188eb10e27702c8",
"blk.15.attn_norm.weight": "a26d07a9752d6dccb68e3a8a2a49fd0752cdd0a415e05547819bc37d9ba63d5e",
"blk.15.attn_output.weight": "c63616c69048ccbee801e05be4f56d21fda21aa0cc470f41d57c31b4d9283a4d",
"blk.15.attn_q.weight": "fd595a67bf96c6ba16eb148a9d02fa52fa3c1d33ed10be28a08f851409fd6e64",
"blk.15.attn_v.weight": "1c5c9d33fa07c05d5f4ed0032c6c4aa83d863f0d31c94a66109d239dcd03cea3",
"blk.15.ffn_down.weight": "585ea62ab8aff7d7d212ea5c1a03226fda6b68370c890b776834af70c948dcbc",
"blk.15.ffn_gate.weight": "a13c63f86f879b03a573d5dd2a25cfd1f4dc73e8132e6454ecc23e538b4cdf6f",
"blk.15.ffn_up.weight": "f7112450f57c12fcd511f049e0dc0b541625a107a7901c3261ed9e984299f65c",
"blk.16.attn_k.weight": "2d2c8b11dd71fba6d1c106aa1673c113a5448653cca7eab897c8739212ed5003",
"blk.16.attn_norm.weight": "95c2ec7be9469690e18a9a1779684acb3e9da44b13e263a0da840305646fbf8a",
"blk.16.attn_output.weight": "31a65046e677f54dae654ded4e733479fcc0f7283d83076b7dc7cbcae8528230",
"blk.16.attn_q.weight": "bfc6292b9c6d49b7118d08060242a138182eb182d136ba5dfaf469437c16081d",
"blk.16.attn_v.weight": "68f81d037340217d87c7853ff4d6edfbc46d9e827ee6d5bff7c3f6238e3a95ad",
"blk.16.ffn_down.weight": "bbd6629691950cef4d5113e1c6670e91b216a9b872cb92cee02dfda4d6c4f7b8",
"blk.16.ffn_gate.weight": "63cb56f282b7401ed6c76e5bb6fdf1bf68a64f9af0c82c014209b55bcb5191d0",
"blk.16.ffn_up.weight": "b54f39a2541063cbfb6f713aa81c3b69a04100e999aa2ebbeec195dc382eceec",
"blk.17.attn_k.weight": "3d9ba49799cc56664ec30a002bcad61eb651294212a68c3ddb573eb042aef5a4",
"blk.17.attn_norm.weight": "42ee0db4b9d63257bca0012a30b12737ead1caafeb5ed3d93c8f48ffec4b46de",
"blk.17.attn_output.weight": "a38fd100f05c9041c592bc739e287de0b10d08ef2bda41a879225bdca9002f71",
"blk.17.attn_q.weight": "8a3bee285b0180a9eb35662e449ee4cbe16d992bdd48fb3a94bc4a347728cfa2",
"blk.17.attn_v.weight": "d7f8f1b8b863494ed4392a1656775912e9b264ad36016547b12e832a1d6757d6",
"blk.17.ffn_down.weight": "bb7ee58f61da8630972e25b621996fbe8ec06f4dc9ab1e268ab5b120c526ca28",
"blk.17.ffn_gate.weight": "6b652dbf167fee09a45ebfd78d500ff6548fb2756dbe5343ffec3f7e6207179f",
"blk.17.ffn_up.weight": "3b67f727e55e742715de978fab80457781e7a3762bc48f79d13b45dcb8de664c",
"blk.18.attn_k.weight": "ff7fe57c57b90c6fcc0aefc39ec24593c3a7d1ea1c23770480075a015450e0f5",
"blk.18.attn_norm.weight": "1d40faca082d2633ef0ccf19e121870dd6c7c3e2154607c7f3543fa96e99cb2d",
"blk.18.attn_output.weight": "9adfecaaa397a92db4687efd5fcabfa0daef9e6b0493763b7ff5ebc185c43a6c",
"blk.18.attn_q.weight": "ad1803eb9b291948639277afe981e666b07167eb3fcae903ba5b73bf86d8f50b",
"blk.18.attn_v.weight": "308cf23399adccf27401a4ab60d74dac6fb9d4cd4b9c5940d9145118d1881b34",
"blk.18.ffn_down.weight": "7de4ac9a561fb580619b745687dfd7ca8a69ef70471dee978741b80e9ff7bead",
"blk.18.ffn_gate.weight": "0c66970f696b33bd5ee8f1f2fbcb41fd78fa5ccabdc927e11a4d5a4089f19c69",
"blk.18.ffn_up.weight": "66a42e988e8a1f468fabf976c48e9e4bb045eaac6916ef16555ac101cd674abc",
"blk.19.attn_k.weight": "a928ab50390bacbcebe2e4b66922498134ce22d7b93beaa87d6cf4ab52eb7174",
"blk.19.attn_norm.weight": "b4a02c55b46c2a96aec9c64a254087cf48e6c1d4b6f31782c77a46fc4daebad1",
"blk.19.attn_output.weight": "b768319c641dff1eac5d1f8ceb960c9899c795bf2b24c1d6bf70aa24fda45f77",
"blk.19.attn_q.weight": "79ef3f57d187d3954a26362096e1b6c222d76f537dff73e034d6e9999935b8bc",
"blk.19.attn_v.weight": "ce13d6b13e24fcb2d5bc6a2662e5bd295b31b12db10a6d0307f86cf29b8d5001",
"blk.19.ffn_down.weight": "cf90d7e2137482cfd50934a8223ad774621d08554969da80a9712df5e6227eb0",
"blk.19.ffn_gate.weight": "71ce30150f003b6eeb3bf7464e05b6ae615f135110d8e47f0a47fd973e537c0f",
"blk.19.ffn_up.weight": "7f92aca0cc29866633feec701ec01a85a8ee2fd4e2b9630173a6cffb1d9d50ee",
"blk.20.attn_k.weight": "a2df23159d6fb74ef28e14b61028fe8b00a693a2fc9234a980be74f20b958682",
"blk.20.attn_norm.weight": "c6cd5f1b096fc5efa4eb59ca1c8c4bd28730f3dcedd59a63601663eccc6724ed",
"blk.20.attn_output.weight": "896a8a166d0f006d4b09867ae4345426303cbc3fb13a18d3d4e1bde00f16dbdf",
"blk.20.attn_q.weight": "01eb79588fe61baea0da43e99f4dc5939590e1bafd01e12dadb8326f102bfea2",
"blk.20.attn_v.weight": "bd39630fdd5a7c859ac1addaf53e63faf524c3f32f5f4896d86b6e746b1d5c06",
"blk.20.ffn_down.weight": "0304a5d39957a0e3f031c4bcc4549a135d396c8d97c8d276fd1c823ce86560c2",
"blk.20.ffn_gate.weight": "117b79d595b1dca0c8b37586beaecc4d84411507276212dc286cde7fc36c9bef",
"blk.20.ffn_up.weight": "6e799346db145c125f01783539749d3828fcc451cd4f10c5352f047a47e28714",
"blk.21.attn_k.weight": "1c37e4c0664147e775bb006b226b9553e3421140cd96288ea755f81731ab80ba",
"blk.21.attn_norm.weight": "00ae783a29000ccda5e4bdbff03df0752fb82805dc3f9b987500ebd80714476e",
"blk.21.attn_output.weight": "7588b84f9fb19f15095b5265c60b4a4e7ae74bcc47d4607dfa5d0bfab6f136cb",
"blk.21.attn_q.weight": "a65f1c0dd06d45bb97532d3e932689c1eecfe7359089b39174a96a149335cbc1",
"blk.21.attn_v.weight": "4220b77e7d5e8709b4eef33a679b5dad11f297085ef44c9977f9e54ef08f7a2d",
"blk.21.ffn_down.weight": "b8c082a0530d4b5328e67db0df84c5498f2af956de23c639fa0198ffea853950",
"blk.21.ffn_gate.weight": "cd1b656ee72d00e9835ef667c19ef89a88de261eb8eb7c0e936e0f9ddf83ef9f",
"blk.21.ffn_up.weight": "dc445f73e36ec7a3bd86884186b728f8e0187f32848c3b8b69d4d41f8571bf31",
"blk.22.attn_k.weight": "e37cf0b893ec8b9ee8c78dd139b8d9c45cb997a3bc0c3d93a70ca1c3f6af8859",
"blk.22.attn_norm.weight": "248a27838d3c46cc03a5c312facc84e2e0e2c990ef8401e93da25918497f88d1",
"blk.22.attn_output.weight": "fc191a18f6d18332c66761f7ab28008bfe295dd1f5c8741a2488442f9e00d0f5",
"blk.22.attn_q.weight": "4b193a2ab8bc2b085db18f2bf3eeba26e02b537b2cdd738160c8f14b165d0f5a",
"blk.22.attn_v.weight": "7a60ce5ccac7e045e55ba1e1e85bd2a0f93f8c781daee96c5223665e22f0c666",
"blk.22.ffn_down.weight": "e0a34fb4244e2c7168f3dbaa1904c15d339ec39999cdf27128bbaf619ee0a237",
"blk.22.ffn_gate.weight": "8bac872d4b8549c8812f927efa309f1792b524f33601095fff61b826de5a5615",
"blk.22.ffn_up.weight": "b67fa2b94dd901b6ec64c0853ce8ca2d86fe9cb1cc6d2f15fbbbe0e691c0c648",
"blk.23.attn_k.weight": "2c32e66ad01942b819ac09a197c71579fe66f02226a264fdd72ad1e02c67a27e",
"blk.23.attn_norm.weight": "825fdc94deb439cb93c713eeb077c1052b90ed658d6d464fc4ad3d611e911d48",
"blk.23.attn_output.weight": "95ca6707a95b8750b0c7c5d379d368f0f2e7ebef631954e7d4d8ec0f41f13a3a",
"blk.23.attn_q.weight": "6eccc84faca5fac015d1b26e2854501edcfd292a302228fe14cf99f5eb59a34b",
"blk.23.attn_v.weight": "b343ac3d226040f1033ee049668aa1d89b1774bc18431965682e5dbdce78ccdc",
"blk.23.ffn_down.weight": "9fc599befea8d3b1e342d564a110074f66d2542df406c4b90b6bdc5828fbb2b2",
"blk.23.ffn_gate.weight": "488556c1b0c9f0b20b0c99b4bac2e0f4046b81edb601d7b91e7e5b3bab47d667",
"blk.23.ffn_up.weight": "1088e291d7008dd9c7c2dd6830af686a8a84b724d123a016209bd5156d6898f1",
"blk.24.attn_k.weight": "a923fbe35e61e009a53927d7828818e0592bb737d6a1106c4b0b5a1efc367e07",
"blk.24.attn_norm.weight": "9b51aaaa939cefafdd9b13a7e5b74ac7fa2d603427e55a16a909d6f3f353750a",
"blk.24.attn_output.weight": "1beb2baba56f8409466434b037771248c2f620ec5f53e15f44c271d5a2d9ecf4",
"blk.24.attn_q.weight": "4b0194fe5bfae0c6bf6131dcf8cb6e2b994f6ea10b27cb03574f0f4f8cc0c950",
"blk.24.attn_v.weight": "6ac34b1ab0f66226d85bca1194a7c212cd93d384ecbc8b8395de48aec0970a61",
"blk.24.ffn_down.weight": "5508f74cb732a662c2936b32ac5e90742d172b9f961a747b0e5cba0e5906a89d",
"blk.24.ffn_gate.weight": "095e39b8584403835f9bb1ac33e0e81f54175575e4800273d281b845bff381e7",
"blk.24.ffn_up.weight": "2d43ec21637dda12973de367b0113ee9840b0d815bf6fce042f7c3f270b0b530",
"blk.25.attn_k.weight": "9e2aee029f3d2c7f67dfc7926e72c8228fb978382c8e5a4701bbf82c93801419",
"blk.25.attn_norm.weight": "220cd7164fb4cdbe22d26058e4153b26c27c7b5ce2bec8e95bf2c0ea08d23103",
"blk.25.attn_output.weight": "a17f4a5dc6aa51f03dbd75602d98e9491767c205cdc2c3a5f8667fc54bbf7c64",
"blk.25.attn_q.weight": "f60827496835c440c794bf57ce9780704d10a59d8229886bf75ebb18900ba4ef",
"blk.25.attn_v.weight": "9cac217e9e9f4f4c85f14ee51165a77c580165bd4a34b202389169bbe61a1ced",
"blk.25.ffn_down.weight": "a0f36949b663e80849581dfb71e7babcc73580793bbcb0c80ab26d5a6e000359",
"blk.25.ffn_gate.weight": "df4d1be4d50d6afe5ad3ef0d0e0fac76a33e85c963dea769641d612dd53e7d13",
"blk.25.ffn_up.weight": "992da76be762632e25ebc5ef4d03728eece1b43f7c4e31827df19ca724aea694",
"blk.26.attn_k.weight": "34199ff856ac32a500c754539d070258574192a34ecba87a182897cb59fdff52",
"blk.26.attn_norm.weight": "a8e9dfb2dae5d22b5c0aec5f3675991c0e3c3e6a44153db2579136b73f456e00",
"blk.26.attn_output.weight": "1c4f257ffb0d7db0f11cfb275e38b4af736917b43ad82de1badce3f1d227da4d",
"blk.26.attn_q.weight": "33d55786274c2e718cf61e8fbecf3dfa5ee0c208f0b716d42b061f55459acb3c",
"blk.26.attn_v.weight": "684b636939cd4ffcfec5a6238a0790ffa43d853c95783af9b9e8275e74071a7a",
"blk.26.ffn_down.weight": "89d0bf066db154e6d312b5433aed1714f6a28b40f4c52e3e1530ee07703303c8",
"blk.26.ffn_gate.weight": "393d649bebe5e2940e1b043649f6c860b4b8b9f380f30e9da1744a830f358156",
"blk.26.ffn_up.weight": "179edc85ababd9d8440cc6093eecd1004290aa1cb96434b26ecf7585b6cca17b",
"blk.27.attn_k.weight": "334841445a7f1e14731b08f56eb0b1f0938c63823d28bc6d078c4c5f05b36f19",
"blk.27.attn_norm.weight": "57344471bbda2e9deffdfdb2dd05a07aa47f8761e24de53525588639145bf551",
"blk.27.attn_output.weight": "506126af9ee54b535d49f97e36f630e74834f480329f098d6d62e96246d8d65a",
"blk.27.attn_q.weight": "dd984df1acb4783849e25ba7ae378bfd385cd9efc540fb798cd5bdd873f0118f",
"blk.27.attn_v.weight": "b4b3fe9a4455d34c297ff20a2f537b647cef424741d840a747b265f23d320ac0",
"blk.27.ffn_down.weight": "621fdb185ba0d35ba5476dae73d2c81ec1482a0e878d5bfd5c3b29fe837af013",
"blk.27.ffn_gate.weight": "e4fbab45f2ec506fa374103251a0bdb7baa6f576080bdd796f3e9db92098e08f",
"blk.27.ffn_up.weight": "a0c57e463e988002bbd6a6c6792baa21a65e6f89ae303a2c301951b0ae6e4bbe",
"blk.28.attn_k.weight": "bac36cbd52ec5056841663865e1291ddab4b47ef9a2544dd285d4503bfb0e4a0",
"blk.28.attn_norm.weight": "5774a9df2bbb2e86d1f70179c7b92d81e1f401160148b3328fb64db6646a5425",
"blk.28.attn_output.weight": "e8712622d1569557000c75f26c3f55fad267fd300463c2c2cfe3afbfa1c8f908",
"blk.28.attn_q.weight": "11677751fddee52cc739699c02836f7be54d96038be4240be5d4f53d00161608",
"blk.28.attn_v.weight": "e5ee459b8958d65e1445997b9aa1e90e2f5d17761ebcf5357313119a45322507",
"blk.28.ffn_down.weight": "3934518f9f85292da8475fe38a8edcbfc4e24ac56c351b472d6351f98750871e",
"blk.28.ffn_gate.weight": "6ba735d57e98d0847e487f25ffaa25256deaa8abec76f428cb70bd9774279d83",
"blk.28.ffn_up.weight": "977fae6e1e5353114fc645dd98429464749758765cbc6e6457593d596e57850c",
"blk.29.attn_k.weight": "8122a457307d580ad6f1e0acea09a2f593d97f595ba0d6737f5fea16d2433642",
"blk.29.attn_norm.weight": "d626f721e05aa1202439b01027031d4caf1adace61ed37870a277cb6297c77cc",
"blk.29.attn_output.weight": "7fb7122ab1b6b1e6615ca746897da27bc52c92cb70d3147183cdde61795b72b3",
"blk.29.attn_q.weight": "be43e94ff6b6e391024dc824101efa0ddf4005d5b002ac26cb03765c0c73c2fa",
"blk.29.attn_v.weight": "af93c85ebff908f74f9935b81bde0516ca487c84139868a1ce079c3ae20036b1",
"blk.29.ffn_down.weight": "39dae12340ed3120bd19c495fe0872b559613641e41fde69d02d8631900b84c0",
"blk.29.ffn_gate.weight": "36fd482439840ef197c9f3b8905d86acfcea49bcf018544106ca465d4bf8d5c7",
"blk.29.ffn_up.weight": "5243fbdfdc1e2a1dd84b6210a9869d18a014db9088897e345240cdc99990bd5d",
"blk.30.attn_k.weight": "948f263616bd3788b2b968baafd69b9c5bd1b77578665f096c4b7e247b4cea42",
"blk.30.attn_norm.weight": "e168df981e744874ff303faf2eb470e5f6868c2040ba5f383f6c5148669975e7",
"blk.30.attn_output.weight": "4cf0ccca04b792573b756655a24fc89cfb1f272da8305633f0bc66ef14990b93",
"blk.30.attn_q.weight": "21e07d6cba6c50d65350289258209717174a13c42be57e8141d69712cbaf32c1",
"blk.30.attn_v.weight": "65a8ca29c7237b3182ccf03e2fc94e84f9a53d0e160fb679ab401c853170dd9c",
"blk.30.ffn_down.weight": "8b00500a6d00d84058f6658ee1d6f06fb4fcae2f90d4341792259362923b3c13",
"blk.30.ffn_gate.weight": "5bc0e19ab7a31b50ac2118ad1b36e31055271a322cd8ff661d47c3ac0210703c",
"blk.30.ffn_up.weight": "f37a0561955725bd59ee2d064fa9f4e00a12a1b620b624db3bc3add5330bc321",
"blk.31.attn_k.weight": "9a5663edda227f5d87533897146764f8e8a7481b9e71fae197c39204f8463221",
"blk.31.attn_norm.weight": "060a4f438a1ee5e220b5b5278ad2f5c085a428bf38c515766781815597c87529",
"blk.31.attn_output.weight": "6ada5d3cad9dea4780ffbb43302bb6ccc2f24eddd0fc4f5f84c9ce0fc0c6e5dd",
"blk.31.attn_q.weight": "bb5d08c08603907981ad388d5d8b70fcc9b98034ba264b8474c8890cc0297af0",
"blk.31.attn_v.weight": "e01b4252ea9c6a889c32b21144b441a347464d04536ef4f6572425be55759796",
"blk.31.ffn_down.weight": "8ba4d679c36e93ba65ba03180385ef35ea86b3b7cdf2fded9df59369f1c09630",
"blk.31.ffn_gate.weight": "e5b41dc93645f8b5e8eebae3ada3ea43a18f97ce2654228655170b07b463ccb0",
"blk.31.ffn_up.weight": "25b88cdddc8b547af294ed107d3d1312e90b983cae87936fa6062ecd8ea02539",
"blk.32.attn_k.weight": "4bcf86dc0858c8ca2fbdf6aa76674d43eb698f78979fdc1a38f556a7af1facc4",
"blk.32.attn_norm.weight": "cdcc12f3b8b9773c6722736bfb748a2729230b21478cbcc4104859d3148df815",
"blk.32.attn_output.weight": "d43f1196822995ed89a9365c97054753a8b30ce20b6e273c8edcc42673a1e141",
"blk.32.attn_q.weight": "ebf2972bb3865cbc5be4840113a322089752038344beab2a0122c7cb4fb399b6",
"blk.32.attn_v.weight": "714db81704ff34fa137512903c1013acee7877467473e46600728b9240582eb7",
"blk.32.ffn_down.weight": "2cde3da1258bb170a79d5d3cdfe10c86a71eb34b77da46b74c5ed71e7f4fe274",
"blk.32.ffn_gate.weight": "c7e1ed792532613ff9d4e5834b6536e2e0f47df2303bc0fdaa90aac0c1f4e8db",
"blk.32.ffn_up.weight": "d8d6f13fe66a716e28f79101a29817f0c0d6f99969a6f017d51bafd1a16c600c",
"blk.33.attn_k.weight": "a0a28f6cbca88da00cab2ca37094d9b0503bf9defdae77b91895b911c408cbb6",
"blk.33.attn_norm.weight": "0251200c24cc8445607ace6dc8c5aa0566567997262b7cca53a11ac23cc564b2",
"blk.33.attn_output.weight": "b2423205bdf6a1096d43c44d8d12f1a84fcd4e1bb70fcf6dc8542b8b8a71a13c",
"blk.33.attn_q.weight": "00b425c3ef71065ce5e0234e702bf38143b4952da78a85f52ab2c2e3073d97ab",
"blk.33.attn_v.weight": "035edd2335df816c42c765a5e66b9d9b9e15a822a8dc1863508145499c942c14",
"blk.33.ffn_down.weight": "4894a923a3db75bae4496ba3ce5f28796ad31fe33996a066271fb8654964310e",
"blk.33.ffn_gate.weight": "8f6c819b8bbfbe3357fae89e1ac5a3d58be85b3b04be3bacf7b62775869046ff",
"blk.33.ffn_up.weight": "257c3544b5b544fd5d839665bf5caf107a329b59dbc3751efcaa24ae63c56179",
"blk.34.attn_k.weight": "b6cd8bba892e38dac4a2ebc3ba1bce49e71b967fc436fde30c6d76f54a18935f",
"blk.34.attn_norm.weight": "2b3c8e60a064cba9955752bbbbdd92c71ba5c2f1bd721097bdbe88b5abc68787",
"blk.34.attn_output.weight": "8cc272551c9aaca9db5a660c6927bab94a0243d74a30b2bc165f06bd577714ea",
"blk.34.attn_q.weight": "74b561eb4792484e6a94b58fe2583848c3ae28ff2f1bf3d02939a0cfdfa49990",
"blk.34.attn_v.weight": "dba19e24ff05154dc5a1f55c023729303a583d13d68732ce22ea74d4410dc8f0",
"blk.34.ffn_down.weight": "76eca5dfeb274c35774e0bf9f22ee420ed9085c8e99aa2cd5a236e4918b44c61",
"blk.34.ffn_gate.weight": "9af0862d5fcbc24732846488e653db8242a467765c0cdbc00332b3a40256b4a6",
"blk.34.ffn_up.weight": "2a03126bf73587eaba99ece2066103d12e47bcd4ce30ff6c17b2f383b81d40df",
"blk.35.attn_k.weight": "52513fc0cd4e997a842729af7d21dd09399bce0a339558374738be266d0fa2f0",
"blk.35.attn_norm.weight": "e5281fa911964263ccf1630b14762edbd41d0b9472d6ec695fc600fed4892c35",
"blk.35.attn_output.weight": "b391d6705d5dc6f48326b5fd16573f679edf64109d86fb729a498819676590ca",
"blk.35.attn_q.weight": "d16446921966db9b0e0539626ad22a2511ace780e59379d6a4162d8c5441440b",
"blk.35.attn_v.weight": "9d8cdf23ffdb0c5c74106843390b94b24c9f33ef0eb9998d39f78c73390101ea",
"blk.35.ffn_down.weight": "938eb6301f7bbf162d7dd965682a5ed11d0a4a530c6fedd7e5469ce80012fc17",
"blk.35.ffn_gate.weight": "5ad84f5a0c8edcfea1ecf1a3e3d21d85ceda0c4ad9e3c6ca68885eeff8ed3c2f",
"blk.35.ffn_up.weight": "1c4330d9dc71bf4c98812c34356c51f520f47610a534152aa6d29284b758090d",
"blk.36.attn_k.weight": "ef720655e5ca2465f13db2dfc4732fb4ef2c9d53acde52f514fd4f301e974081",
"blk.36.attn_norm.weight": "88f4b9310b3c8c2644e3029160cd35678c79dfa59280430e03f5c29a6fe84a58",
"blk.36.attn_output.weight": "aec6f915fffd7bb72cd783273e871b4f09605950089d45e72059d1316b6c4b01",
"blk.36.attn_q.weight": "72f9408a2405d42f8db6ce5fcf1d26a3660b6f225fc60e77d0277109cfcb82ed",
"blk.36.attn_v.weight": "0f3b3d851dc44b3893ef53f6cca5b4acc9658bacfe1cc2d13c3d704ddd409b67",
"blk.36.ffn_down.weight": "470aec48ce8c5129a6654d9fd26fcae72776f9fc1429a8bb05818072a876475d",
"blk.36.ffn_gate.weight": "7f5f296d09cf55679767b5d15de3eff489c456782119f25204be4b1647f18dcf",
"blk.36.ffn_up.weight": "b7ef74a1f7ffb4982711d93f1787be3a70edc3d2358d5203c41d8900508037d4",
"blk.37.attn_k.weight": "c4ffa5412e4ff2dcfe1aed991c1f54169fd171a4c7638e4b9f21a1ca64c5e1d6",
"blk.37.attn_norm.weight": "4eb6c888d841cccfacf5b963f8611120f6ff24b84af0b5714fd9ab36dcda422f",
"blk.37.attn_output.weight": "db2a7bbf9682f9f6eea672dae8e150738f1bf74dbc80edc7022017a3f040c8ac",
"blk.37.attn_q.weight": "e38c0462aff139afcbab289189823527e453abc9e541154adde5e7af88cacf0b",
"blk.37.attn_v.weight": "952eb2492ed452a72f96bcc12d4b2affad9dfdf46ee39ce4a5d7b57a5dc301e5",
"blk.37.ffn_down.weight": "25f23a8fbc44febf6dc4848fd7fe03a580e2822bd3b3b5a51f4990826bfe3e4e",
"blk.37.ffn_gate.weight": "707da5eb40118b035305d3262444382351f170a20a537386a70e90c5a83a7817",
"blk.37.ffn_up.weight": "d2d2ba5cfc4ef47338dd7384219e22bf030a5a2209e0354d88f5bbaaafd20e87",
"blk.38.attn_k.weight": "abc4bb189dedf7ce661e79028427623a4f91ac091c2cd60e31b58bc62b1cda71",
"blk.38.attn_norm.weight": "9f4803a7d03fd40fcb83d85f84eb1d5682ea4e5bb084f210c02850675d804c3d",
"blk.38.attn_output.weight": "77cb66007f1a41df7135d0e7f900ceb499c2f667dfc3f1a6ac01a3203bbd3ccf",
"blk.38.attn_q.weight": "d94a8b26cd375bf2bcaa76597e314aa8268ee50a479d00931e5e0e021feadb5d",
"blk.38.attn_v.weight": "660c907888bc5016dc69b7d35fe6f55c7ded697c93be0e2d332a2f17aff88758",
"blk.38.ffn_down.weight": "6f06173bae5b00ffaf88ef383619a8b9c6a8d0d5c6494695d17f6c1de1a68a13",
"blk.38.ffn_gate.weight": "89f99be149d03f116527bfcabe073c50001c874de40fb6e817f6619027f3cd05",
"blk.38.ffn_up.weight": "8d57557c8d5e2d2688b73f01dddf1ce8d5194990cda6358153320aea88aac7f8",
"blk.39.attn_k.weight": "21be09c988b46c8393e6c2ec9230f3b5136eb7607dd1953ba92d0811c2f0dd75",
"blk.39.attn_norm.weight": "ba7c1912dd1c4e2d16917201f62396fd0600e4a451137eaddff255548c209abd",
"blk.39.attn_output.weight": "acfaf4abb3fd27fd899b5563c3877f176b597d8f6cdb2f2fd3f3a0bd4da15ed6",
"blk.39.attn_q.weight": "e8adbc140d4c8f0db2a27ca584c5531d5b1e080555fe627e34d80d0814a92bed",
"blk.39.attn_v.weight": "92f96b0e1f724e73a0f90a76c145654418844c04a6d4b14c05eb5af8a62bf8dc",
"blk.39.ffn_down.weight": "4d9ee7c65fc16fe95d10c47b79ac6a525741947600a64b5fcea5d300a82c50de",
"blk.39.ffn_gate.weight": "7e18507989f39b32191133d2657c2ee3b74f42f070579204d727eb72215793d1",
"blk.39.ffn_up.weight": "22cda752269c9757ba918abede1df95bb0f83a5c772dea13c8deea3d5f2723d9",
"output_norm.weight": "2858cf0e39d32caf52b7861378ace076000241e147f10b9eb21d8a5cd149e3cb"
}

View File

File diff suppressed because one or more lines are too long

View File

@@ -718,23 +718,18 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
func LibraryDirs() []string {
// dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable
// This can be simplified once we no longer carry runners as payloads
paths := []string{}
appExe, err := os.Executable()
exe, err := os.Executable()
if err != nil {
slog.Warn("failed to lookup executable path", "error", err)
} else {
appRelative := filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe(), "lib", "ollama")
if _, err := os.Stat(appRelative); err == nil {
paths = append(paths, appRelative)
}
return nil
}
rDir := runners.Locate()
if err != nil {
slog.Warn("unable to locate gpu dependency libraries", "error", err)
} else {
paths = append(paths, filepath.Dir(rDir))
lib := filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama")
if _, err := os.Stat(lib); err != nil {
return nil
}
return paths
return []string{lib}
}
func GetSystemInfo() SystemInfo {

View File

@@ -2,7 +2,7 @@
### Getting Started
* [Quickstart](../README.md#quickstart)
* [Examples](./examples.md)
* [Examples](../examples)
* [Importing models](./import.md)
* [Linux Documentation](./linux.md)
* [Windows Documentation](./windows.md)

View File

@@ -1,15 +1,15 @@
package llm
package ggml
import (
"encoding/binary"
"errors"
"fmt"
"io"
"log/slog"
"slices"
"strings"
"sync"
"github.com/ollama/ollama/util/bufioutil"
"github.com/ollama/ollama/fs/util/bufioutil"
)
type GGML struct {
@@ -19,145 +19,168 @@ type GGML struct {
type model interface {
KV() KV
Tensors() *Tensors
Tensors() Tensors
}
type KV map[string]any
func (kv KV) u64(key string) uint64 {
switch v := kv[key].(type) {
case uint64:
return v
case uint32:
return uint64(v)
case float64:
return uint64(v)
default:
return 0
}
}
func (kv KV) Architecture() string {
if s, ok := kv["general.architecture"].(string); ok {
return s
}
return "unknown"
return kv.String("general.architecture", "unknown")
}
func (kv KV) Kind() string {
if s, ok := kv["general.type"].(string); ok {
return s
}
return "unknown"
return kv.String("general.type", "unknown")
}
func (kv KV) ParameterCount() uint64 {
return kv.u64("general.parameter_count")
return keyValue[uint64](kv, "general.parameter_count")
}
func (kv KV) FileType() fileType {
if u64 := kv.u64("general.file_type"); u64 > 0 {
return fileType(uint32(u64))
if t := kv.Uint("general.file_type"); t > 0 {
return fileType(t)
}
return fileTypeUnknown
}
func (kv KV) BlockCount() uint64 {
return kv.u64(fmt.Sprintf("%s.block_count", kv.Architecture()))
return uint64(kv.Uint("block_count"))
}
func (kv KV) EmbeddingLength() uint64 {
return uint64(kv.Uint("embedding_length"))
}
func (kv KV) HeadCount() uint64 {
return kv.u64(fmt.Sprintf("%s.attention.head_count", kv.Architecture()))
return uint64(kv.Uint("attention.head_count"))
}
func (kv KV) HeadCountKV() uint64 {
if headCountKV := kv.u64(fmt.Sprintf("%s.attention.head_count_kv", kv.Architecture())); headCountKV > 0 {
return headCountKV
}
return 1
return uint64(kv.Uint("attention.head_count_kv", 1))
}
func (kv KV) EmbeddingHeadCount() uint64 {
if heads := kv.HeadCount(); heads > 0 {
return kv.EmbeddingLength() / kv.HeadCount()
return kv.EmbeddingLength() / heads
}
return 0
}
func (kv KV) EmbeddingHeadCountK() uint64 {
if k := kv.u64(fmt.Sprintf("%s.attention.key_length", kv.Architecture())); k > 0 {
return k
}
return kv.EmbeddingHeadCount()
return uint64(kv.Uint("attention.key_length", uint32(kv.EmbeddingHeadCount())))
}
func (kv KV) EmbeddingHeadCountV() uint64 {
if v := kv.u64(fmt.Sprintf("%s.attention.value_length", kv.Architecture())); v > 0 {
return v
}
return kv.EmbeddingHeadCount()
return uint64(kv.Uint("attention.value_length", uint32(kv.EmbeddingHeadCount())))
}
func (kv KV) GQA() uint64 {
return kv.HeadCount() / kv.HeadCountKV()
}
func (kv KV) EmbeddingLength() uint64 {
return kv.u64(fmt.Sprintf("%s.embedding_length", kv.Architecture()))
}
func (kv KV) ContextLength() uint64 {
return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture()))
return uint64(kv.Uint("context_length"))
}
func (kv KV) ChatTemplate() string {
s, _ := kv["tokenizer.chat_template"].(string)
return kv.String("tokenizer.chat_template")
}
func (kv KV) String(key string, defaultValue ...string) string {
return keyValue(kv, key, append(defaultValue, "")...)
}
func (kv KV) Uint(key string, defaultValue ...uint32) uint32 {
return keyValue(kv, key, append(defaultValue, 0)...)
}
func (kv KV) Float(key string, defaultValue ...float32) float32 {
return keyValue(kv, key, append(defaultValue, 0)...)
}
func (kv KV) Strings(key string, defaultValue ...[]string) []string {
r := keyValue(kv, key, &array{})
s := make([]string, r.size)
for i := range r.size {
s[i] = r.values[i].(string)
}
return s
}
type Tensors struct {
Items []*Tensor
Offset uint64
func (kv KV) Uints(key string, defaultValue ...[]uint32) []uint32 {
r := keyValue(kv, key, &array{})
s := make([]uint32, r.size)
for i := range r.size {
s[i] = uint32(r.values[i].(int32))
}
layers map[string]Layer
layersOnce sync.Once
return s
}
func (ts *Tensors) Layers() map[string]Layer {
ts.layersOnce.Do(func() {
ts.layers = make(map[string]Layer)
for _, t := range ts.Items {
parts := strings.Split(t.Name, ".")
if index := slices.IndexFunc(parts, func(s string) bool { return s == "blk" || s == "mm" }); index != -1 {
if len(parts) > index+2 {
// blk and mm should have a number after them, join it
parts = append(
[]string{strings.Join(parts[:index+2], ".")},
parts[index+2:]...)
}
}
func keyValue[T string | uint32 | uint64 | float32 | *array](kv KV, key string, defaultValue ...T) T {
if !strings.HasPrefix(key, "tokenizer.") && !strings.HasPrefix(key, "general.") {
key = kv.Architecture() + "." + key
}
if _, ok := ts.layers[parts[0]]; !ok {
ts.layers[parts[0]] = make(Layer)
}
if val, ok := kv[key]; ok {
return val.(T)
}
ts.layers[parts[0]][strings.Join(parts[1:], ".")] = t
slog.Warn("key not found", "key", key, "default", defaultValue[0])
return defaultValue[0]
}
type Tensors struct {
items []*Tensor
Offset uint64
}
func (s Tensors) Items(prefix ...string) []*Tensor {
if len(prefix) == 0 {
return s.items
}
var items []*Tensor
for _, t := range s.items {
if strings.HasPrefix(t.Name, prefix[0]) {
items = append(items, t)
}
})
}
return ts.layers
return items
}
func (ts Tensors) Layers() map[string]Layer {
layers := make(map[string]Layer)
for _, t := range ts.items {
parts := strings.Split(t.Name, ".")
if i := slices.Index(parts, "blk"); i > 0 {
parts = append([]string{
strings.Join(parts[:i], "."),
strings.Join(parts[i:i+2], "."),
}, parts[i+2:]...)
} else if i == 0 {
parts = append([]string{
strings.Join(parts[i:i+2], "."),
}, parts[i+2:]...)
}
if _, ok := layers[parts[0]]; !ok {
layers[parts[0]] = make(Layer)
}
layers[parts[0]][strings.Join(parts[1:], ".")] = t
}
return layers
}
type Layer map[string]*Tensor
func (l Layer) size() (size uint64) {
func (l Layer) Size() (size uint64) {
for _, t := range l {
size += t.Size()
}
@@ -255,8 +278,6 @@ func (t Tensor) typeSize() uint64 {
return 8
case 29: // IQ1_M
return blockSize/8 + blockSize/16 + blockSize/32
case 30: // BF16
return 2
default:
return 0
}
@@ -295,7 +316,7 @@ const (
var ErrUnsupportedFormat = errors.New("unsupported model format")
func DetectGGMLType(b []byte) string {
func DetectContentType(b []byte) string {
switch binary.LittleEndian.Uint32(b[:4]) {
case FILE_MAGIC_GGML:
return "ggml"
@@ -312,12 +333,12 @@ func DetectGGMLType(b []byte) string {
}
}
// DecodeGGML decodes a GGML model from the given reader.
// Decode decodes a GGML model from the given reader.
//
// It collects array values for arrays with a size less than or equal to
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
// the maxArraySize is negative, all arrays are collected.
func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
if maxArraySize == 0 {
maxArraySize = 1024
}
@@ -331,10 +352,6 @@ func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
var c container
switch magic {
case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
return nil, 0, ErrUnsupportedFormat
case FILE_MAGIC_GGLA:
c = &containerGGLA{}
case FILE_MAGIC_GGUF_LE:
c = &containerGGUF{ByteOrder: binary.LittleEndian, maxArraySize: maxArraySize}
case FILE_MAGIC_GGUF_BE:
@@ -530,21 +547,20 @@ func (llm GGML) GraphSize(context, batch uint64, kvCacheType string) (kv, partia
}
// SupportsKVCacheType checks if the requested cache type is supported
func (ggml GGML) SupportsKVCacheType(cacheType string) bool {
validKVCacheTypes := []string{"f16", "q8_0", "q4_0"}
return slices.Contains(validKVCacheTypes, cacheType)
func (llm GGML) SupportsKVCacheType(cacheType string) bool {
return slices.Contains([]string{"f16", "q8_0", "q4_0"}, cacheType)
}
// SupportsFlashAttention checks if the model supports flash attention
func (ggml GGML) SupportsFlashAttention() bool {
_, isEmbedding := ggml.KV()[fmt.Sprintf("%s.pooling_type", ggml.KV().Architecture())]
func (llm GGML) SupportsFlashAttention() bool {
_, isEmbedding := llm.KV()[fmt.Sprintf("%s.pooling_type", llm.KV().Architecture())]
if isEmbedding {
return false
}
// Check head counts match and are non-zero
headCountK := ggml.KV().EmbeddingHeadCountK()
headCountV := ggml.KV().EmbeddingHeadCountV()
headCountK := llm.KV().EmbeddingHeadCountK()
headCountV := llm.KV().EmbeddingHeadCountV()
return headCountK != 0 && headCountV != 0 && headCountK == headCountV
}

View File

@@ -1,4 +1,4 @@
package llm
package ggml
import (
"bytes"
@@ -8,10 +8,9 @@ import (
"fmt"
"io"
"log/slog"
"maps"
"slices"
"strings"
"golang.org/x/exp/maps"
)
type containerGGUF struct {
@@ -110,9 +109,9 @@ func (llm *gguf) KV() KV {
return llm.kv
}
func (llm *gguf) Tensors() *Tensors {
return &Tensors{
Items: llm.tensors,
func (llm *gguf) Tensors() Tensors {
return Tensors{
items: llm.tensors,
Offset: llm.tensorOffset,
}
}
@@ -523,7 +522,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
return err
}
keys := maps.Keys(kv)
keys := slices.Collect(maps.Keys(kv))
slices.Sort(keys)
for _, key := range keys {

View File

@@ -1,4 +1,4 @@
package llm
package ggml
import "fmt"
@@ -32,10 +32,9 @@ const (
fileTypeIQ1_S
fileTypeIQ4_NL
fileTypeIQ3_S
fileTypeIQ3_M
fileTypeIQ2_S
fileTypeIQ2_M
fileTypeIQ4_XS
fileTypeIQ2_M
fileTypeIQ1_M
fileTypeBF16
@@ -94,8 +93,6 @@ func ParseFileType(s string) (fileType, error) {
return fileTypeIQ4_NL, nil
case "IQ3_S":
return fileTypeIQ3_S, nil
case "IQ3_M":
return fileTypeIQ3_M, nil
case "IQ2_S":
return fileTypeIQ2_S, nil
case "IQ4_XS":
@@ -163,8 +160,6 @@ func (t fileType) String() string {
return "IQ4_NL"
case fileTypeIQ3_S:
return "IQ3_S"
case fileTypeIQ3_M:
return "IQ3_M"
case fileTypeIQ2_S:
return "IQ2_S"
case fileTypeIQ4_XS:

6
go.mod
View File

@@ -17,12 +17,15 @@ require (
require (
github.com/agnivade/levenshtein v1.1.1
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
github.com/dlclark/regexp2 v1.11.4
github.com/emirpasic/gods/v2 v2.0.0-alpha
github.com/google/go-cmp v0.6.0
github.com/mattn/go-runewidth v0.0.14
github.com/nlpodyssey/gopickle v0.3.0
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
golang.org/x/image v0.22.0
golang.org/x/tools v0.28.0
gonum.org/v1/gonum v0.15.0
)
require (
@@ -42,7 +45,6 @@ require (
github.com/xtgo/set v1.0.0 // indirect
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
gonum.org/v1/gonum v0.15.0 // indirect
gorgonia.org/vecf32 v0.9.0 // indirect
gorgonia.org/vecf64 v0.9.0 // indirect
)
@@ -70,7 +72,7 @@ require (
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.31.0
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
golang.org/x/net v0.25.0 // indirect
golang.org/x/net v0.32.0 // indirect
golang.org/x/sys v0.28.0
golang.org/x/term v0.27.0
golang.org/x/text v0.21.0

8
go.sum
View File

@@ -42,6 +42,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/emirpasic/gods/v2 v2.0.0-alpha h1:dwFlh8pBg1VMOXWGipNMRt8v96dKAIvBehtCt6OtunU=
github.com/emirpasic/gods/v2 v2.0.0-alpha/go.mod h1:W0y4M2dtBB9U5z3YlghmpuUhiaZT2h6yoeE+C1sCp6A=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -255,8 +257,8 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI=
golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -307,6 +309,8 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8=
golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

22
grammar/bench_test.go Normal file
View File

@@ -0,0 +1,22 @@
//go:build go1.24
package grammar
import "testing"
func BenchmarkFromSchema(b *testing.B) {
for tt := range testCases(b) {
b.Run("", func(b *testing.B) {
s := []byte(tt.schema)
b.ReportAllocs()
for b.Loop() {
_, err := FromSchema(nil, s)
if err != nil {
b.Fatalf("GrammarFromSchema: %v", err)
}
}
})
return
}
}

227
grammar/grammar.go Normal file
View File

@@ -0,0 +1,227 @@
package grammar
import (
"bytes"
"encoding/json"
"fmt"
"iter"
"strconv"
"github.com/ollama/ollama/grammar/jsonschema"
)
const jsonTerms = `
# Unicode
#
# Unicode characters can be specified directly in the grammar, for example
# hiragana ::= [ぁ-ゟ], or with escapes: 8-bit (\xXX), 16-bit (\uXXXX) or 32-bit
# (\UXXXXXXXX).
unicode ::= \x{hex}{2} | \u{hex}{4} | \U{hex}{8}
# JSON grammar from RFC 7159
null ::= "null"
object ::= "{" (kv ("," kv)*)? "}"
array ::= "[" (value ("," value)*)? "]"
kv ::= string ":" value
integer ::= "0" | [1-9] [0-9]*
number ::= "-"? integer frac? exp?
frac ::= "." [0-9]+
exp ::= ("e" | "E") ("+" | "-") [0-9]+
string ::= "\"" char* "\""
escape ::= ["/" | "b" | "f" | "n" | "r" | "t" | unicode]
char ::= [^"\\] | escape
space ::= (" " | "\t" | "\n" | "\r")*
hex ::= [0-9] | [a-f] | [A-F]
boolean ::= "true" | "false"
value ::= object | array | string | number | boolean | "null"
# User-defined
`
// FromSchema generates a grammar from a JSON schema.
func FromSchema(buf []byte, jsonSchema []byte) ([]byte, error) {
var s *jsonschema.Schema
if err := json.Unmarshal(jsonSchema, &s); err != nil {
return nil, err
}
var g builder
// "root" is the only rule that is guaranteed to exist, so we start
// with its length for padding, and then adjust it as we go.
g.pad = len("root")
for id := range dependencies("root", s) {
g.pad = max(g.pad, len(id))
}
g.b.WriteString(jsonTerms)
ids := make(map[*jsonschema.Schema]string)
for id, s := range dependencies("root", s) {
ids[s] = id
g.define(id)
if err := fromSchema(&g, ids, s); err != nil {
return nil, err
}
}
g.define("root")
if err := fromSchema(&g, ids, s); err != nil {
return nil, err
}
g.define("") // finalize the last rule
return g.b.Bytes(), nil
}
func fromSchema(g *builder, ids map[*jsonschema.Schema]string, s *jsonschema.Schema) error {
switch typ := s.EffectiveType(); typ {
case "array":
if len(s.PrefixItems) == 0 && s.Items == nil {
g.u("array")
} else {
g.q("[")
for i, s := range s.PrefixItems {
if i > 0 {
g.q(",")
}
g.u(ids[s])
}
if s.Items != nil {
g.u("(")
if len(s.PrefixItems) > 0 {
g.q(",")
}
g.u(ids[s.Items])
g.u(")*")
}
g.q("]")
}
case "object":
if len(s.Properties) == 0 {
g.u("object")
} else {
g.q("{")
for i, p := range s.Properties {
name := ids[p]
if i > 0 {
g.q(",")
}
g.q(p.Name)
g.q(":")
g.u(name)
}
g.q("}")
}
case "number":
buildConstrainedNumber(g, s)
case "string":
if len(s.Enum) == 0 {
g.u("string")
} else {
g.u("(")
for i, e := range s.Enum {
if i > 0 {
g.q("|")
}
g.q(string(e))
}
g.u(")")
}
case "boolean", "value", "null", "integer":
g.u(typ)
default:
return fmt.Errorf("%s: unsupported type %q", s.Name, typ)
}
return nil
}
// dependencies returns a sequence of all child dependencies of the schema in
// post-order.
//
// The first value is the id/pointer to the dependency, and the second value
// is the schema.
func dependencies(id string, s *jsonschema.Schema) iter.Seq2[string, *jsonschema.Schema] {
return func(yield func(string, *jsonschema.Schema) bool) {
for i, p := range s.Properties {
id := fmt.Sprintf("%s_%d", id, i)
for did, d := range dependencies(id, p) {
if !yield(did, d) {
return
}
}
if !yield(id, p) {
return
}
}
for i, p := range s.PrefixItems {
id := fmt.Sprintf("tuple_%d", i)
for did, d := range dependencies(id, p) {
id := fmt.Sprintf("%s_%s", id, did)
if !yield(id, d) {
return
}
}
if !yield(id, p) {
return
}
}
if s.Items != nil {
id := fmt.Sprintf("%s_tuple_%d", id, len(s.PrefixItems))
for did, d := range dependencies(id, s.Items) {
if !yield(did, d) {
return
}
}
if !yield(id, s.Items) {
return
}
}
}
}
type builder struct {
b bytes.Buffer
pad int
rules int
items int
}
// define terminates the current rule, if any, and then either starts a new
// rule or does nothing else if the name is empty.
func (b *builder) define(name string) {
if b.rules > 0 {
b.b.WriteString(";\n")
}
if name == "" {
return
}
fmt.Fprintf(&b.b, "% -*s", b.pad, name)
b.b.WriteString(" ::=")
b.rules++
b.items = 0
}
// quote appends a terminal to the current rule.
func (b *builder) q(s string) {
if b.items > 0 {
b.b.WriteString(" ")
}
b.b.WriteString(" ")
b.b.WriteString(strconv.Quote(s))
}
// u appends a non-terminal to the current rule.
func (b *builder) u(s string) {
if b.items > 0 {
b.b.WriteString(" ")
}
b.b.WriteString(" ")
b.b.WriteString(s)
}
func buildConstrainedNumber(b *builder, s *jsonschema.Schema) {
if s.Minimum == 0 && s.Maximum == 0 {
b.u("TODO")
} else {
b.u("number")
}
}

75
grammar/grammar_test.go Normal file
View File

@@ -0,0 +1,75 @@
package grammar
import (
"bufio"
"cmp"
"iter"
"strings"
"testing"
_ "embed"
"github.com/ollama/ollama/grammar/internal/diff"
)
func TestFromSchema(t *testing.T) {
for tt := range testCases(t) {
t.Run(tt.name, func(t *testing.T) {
g, err := FromSchema(nil, []byte(tt.schema))
if err != nil {
t.Fatalf("FromSchema: %v", err)
}
got := string(g)
got = strings.TrimPrefix(got, jsonTerms)
if got != tt.want {
t.Logf("schema:\n%s", tt.schema)
t.Fatal(string(diff.Diff("got", []byte(got), "want", []byte(tt.want))))
}
})
}
}
type testCase struct {
name string
schema string
want string
}
//go:embed testdata/schemas.txt
var tests string
func testCases(t testing.TB) iter.Seq[testCase] {
t.Helper()
return func(yield func(testCase) bool) {
t.Helper()
sc := bufio.NewScanner(strings.NewReader(tests))
name := ""
for sc.Scan() {
line := strings.TrimSpace(sc.Text())
if line == "" {
name = ""
continue
}
if line[0] == '#' {
name = cmp.Or(name, strings.TrimSpace(line[1:]))
continue
}
s := sc.Text()
g := ""
for sc.Scan() {
line = strings.TrimSpace(sc.Text())
if line == "" || line[0] == '#' {
break
}
g += sc.Text() + "\n"
}
if !yield(testCase{name, s, g}) {
return
}
name = strings.TrimSpace(strings.TrimPrefix(line, "#"))
}
if err := sc.Err(); err != nil {
t.Fatalf("error reading tests: %v", err)
}
}
}

View File

@@ -0,0 +1,261 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package diff
import (
"bytes"
"fmt"
"sort"
"strings"
)
// A pair is a pair of values tracked for both the x and y side of a diff.
// It is typically a pair of line indexes.
type pair struct{ x, y int }
// Diff returns an anchored diff of the two texts old and new
// in the “unified diff” format. If old and new are identical,
// Diff returns a nil slice (no output).
//
// Unix diff implementations typically look for a diff with
// the smallest number of lines inserted and removed,
// which can in the worst case take time quadratic in the
// number of lines in the texts. As a result, many implementations
// either can be made to run for a long time or cut off the search
// after a predetermined amount of work.
//
// In contrast, this implementation looks for a diff with the
// smallest number of “unique” lines inserted and removed,
// where unique means a line that appears just once in both old and new.
// We call this an “anchored diff” because the unique lines anchor
// the chosen matching regions. An anchored diff is usually clearer
// than a standard diff, because the algorithm does not try to
// reuse unrelated blank lines or closing braces.
// The algorithm also guarantees to run in O(n log n) time
// instead of the standard O(n²) time.
//
// Some systems call this approach a “patience diff,” named for
// the “patience sorting” algorithm, itself named for a solitaire card game.
// We avoid that name for two reasons. First, the name has been used
// for a few different variants of the algorithm, so it is imprecise.
// Second, the name is frequently interpreted as meaning that you have
// to wait longer (to be patient) for the diff, meaning that it is a slower algorithm,
// when in fact the algorithm is faster than the standard one.
func Diff(oldName string, old []byte, newName string, new []byte) []byte {
if bytes.Equal(old, new) {
return nil
}
x := lines(old)
y := lines(new)
// Print diff header.
var out bytes.Buffer
fmt.Fprintf(&out, "diff %s %s\n", oldName, newName)
fmt.Fprintf(&out, "--- %s\n", oldName)
fmt.Fprintf(&out, "+++ %s\n", newName)
// Loop over matches to consider,
// expanding each match to include surrounding lines,
// and then printing diff chunks.
// To avoid setup/teardown cases outside the loop,
// tgs returns a leading {0,0} and trailing {len(x), len(y)} pair
// in the sequence of matches.
var (
done pair // printed up to x[:done.x] and y[:done.y]
chunk pair // start lines of current chunk
count pair // number of lines from each side in current chunk
ctext []string // lines for current chunk
)
for _, m := range tgs(x, y) {
if m.x < done.x {
// Already handled scanning forward from earlier match.
continue
}
// Expand matching lines as far as possible,
// establishing that x[start.x:end.x] == y[start.y:end.y].
// Note that on the first (or last) iteration we may (or definitely do)
// have an empty match: start.x==end.x and start.y==end.y.
start := m
for start.x > done.x && start.y > done.y && x[start.x-1] == y[start.y-1] {
start.x--
start.y--
}
end := m
for end.x < len(x) && end.y < len(y) && x[end.x] == y[end.y] {
end.x++
end.y++
}
// Emit the mismatched lines before start into this chunk.
// (No effect on first sentinel iteration, when start = {0,0}.)
for _, s := range x[done.x:start.x] {
ctext = append(ctext, "-"+s)
count.x++
}
for _, s := range y[done.y:start.y] {
ctext = append(ctext, "+"+s)
count.y++
}
// If we're not at EOF and have too few common lines,
// the chunk includes all the common lines and continues.
const C = 3 // number of context lines
if (end.x < len(x) || end.y < len(y)) &&
(end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) {
for _, s := range x[start.x:end.x] {
ctext = append(ctext, " "+s)
count.x++
count.y++
}
done = end
continue
}
// End chunk with common lines for context.
if len(ctext) > 0 {
n := end.x - start.x
if n > C {
n = C
}
for _, s := range x[start.x : start.x+n] {
ctext = append(ctext, " "+s)
count.x++
count.y++
}
done = pair{start.x + n, start.y + n}
// Format and emit chunk.
// Convert line numbers to 1-indexed.
// Special case: empty file shows up as 0,0 not 1,0.
if count.x > 0 {
chunk.x++
}
if count.y > 0 {
chunk.y++
}
fmt.Fprintf(&out, "@@ -%d,%d +%d,%d @@\n", chunk.x, count.x, chunk.y, count.y)
for _, s := range ctext {
out.WriteString(s)
}
count.x = 0
count.y = 0
ctext = ctext[:0]
}
// If we reached EOF, we're done.
if end.x >= len(x) && end.y >= len(y) {
break
}
// Otherwise start a new chunk.
chunk = pair{end.x - C, end.y - C}
for _, s := range x[chunk.x:end.x] {
ctext = append(ctext, " "+s)
count.x++
count.y++
}
done = end
}
return out.Bytes()
}
// lines returns the lines in the file x, including newlines.
// If the file does not end in a newline, one is supplied
// along with a warning about the missing newline.
func lines(x []byte) []string {
l := strings.SplitAfter(string(x), "\n")
if l[len(l)-1] == "" {
l = l[:len(l)-1]
} else {
// Treat last line as having a message about the missing newline attached,
// using the same text as BSD/GNU diff (including the leading backslash).
l[len(l)-1] += "\n\\ No newline at end of file\n"
}
return l
}
// tgs returns the pairs of indexes of the longest common subsequence
// of unique lines in x and y, where a unique line is one that appears
// once in x and once in y.
//
// The longest common subsequence algorithm is as described in
// Thomas G. Szymanski, “A Special Case of the Maximal Common
// Subsequence Problem,” Princeton TR #170 (January 1975),
// available at https://research.swtch.com/tgs170.pdf.
func tgs(x, y []string) []pair {
// Count the number of times each string appears in a and b.
// We only care about 0, 1, many, counted as 0, -1, -2
// for the x side and 0, -4, -8 for the y side.
// Using negative numbers now lets us distinguish positive line numbers later.
m := make(map[string]int)
for _, s := range x {
if c := m[s]; c > -2 {
m[s] = c - 1
}
}
for _, s := range y {
if c := m[s]; c > -8 {
m[s] = c - 4
}
}
// Now unique strings can be identified by m[s] = -1+-4.
//
// Gather the indexes of those strings in x and y, building:
// xi[i] = increasing indexes of unique strings in x.
// yi[i] = increasing indexes of unique strings in y.
// inv[i] = index j such that x[xi[i]] = y[yi[j]].
var xi, yi, inv []int
for i, s := range y {
if m[s] == -1+-4 {
m[s] = len(yi)
yi = append(yi, i)
}
}
for i, s := range x {
if j, ok := m[s]; ok && j >= 0 {
xi = append(xi, i)
inv = append(inv, j)
}
}
// Apply Algorithm A from Szymanski's paper.
// In those terms, A = J = inv and B = [0, n).
// We add sentinel pairs {0,0}, and {len(x),len(y)}
// to the returned sequence, to help the processing loop.
J := inv
n := len(xi)
T := make([]int, n)
L := make([]int, n)
for i := range T {
T[i] = n + 1
}
for i := range n {
k := sort.Search(n, func(k int) bool {
return T[k] >= J[i]
})
T[k] = J[i]
L[i] = k + 1
}
k := 0
for _, v := range L {
if k < v {
k = v
}
}
seq := make([]pair, 2+k)
seq[1+k] = pair{len(x), len(y)} // sentinel at end
lastj := n
for i := n - 1; i >= 0; i-- {
if L[i] == k && J[i] < lastj {
seq[k] = pair{xi[i], yi[J[i]]}
k--
}
}
seq[0] = pair{0, 0} // sentinel at start
return seq
}

View File

@@ -0,0 +1,44 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package diff
import (
"bytes"
"path/filepath"
"testing"
"golang.org/x/tools/txtar"
)
func clean(text []byte) []byte {
text = bytes.ReplaceAll(text, []byte("$\n"), []byte("\n"))
text = bytes.TrimSuffix(text, []byte("^D\n"))
return text
}
func Test(t *testing.T) {
files, _ := filepath.Glob("testdata/*.txt")
if len(files) == 0 {
t.Fatalf("no testdata")
}
for _, file := range files {
t.Run(filepath.Base(file), func(t *testing.T) {
a, err := txtar.ParseFile(file)
if err != nil {
t.Fatal(err)
}
if len(a.Files) != 3 || a.Files[2].Name != "diff" {
t.Fatalf("%s: want three files, third named \"diff\"", file)
}
diffs := Diff(a.Files[0].Name, clean(a.Files[0].Data), a.Files[1].Name, clean(a.Files[1].Data))
want := clean(a.Files[2].Data)
if !bytes.Equal(diffs, want) {
t.Fatalf("%s: have:\n%s\nwant:\n%s\n%s", file,
diffs, want, Diff("have", diffs, "want", want))
}
})
}
}

View File

@@ -0,0 +1,13 @@
-- old --
-- new --
a
b
c
-- diff --
diff old new
--- old
+++ new
@@ -0,0 +1,3 @@
+a
+b
+c

View File

@@ -0,0 +1,13 @@
-- old --
a
b
c
-- new --
-- diff --
diff old new
--- old
+++ new
@@ -1,3 +0,0 @@
-a
-b
-c

View File

@@ -0,0 +1,35 @@
Example from Hunt and McIlroy, “An Algorithm for Differential File Comparison.”
https://www.cs.dartmouth.edu/~doug/diff.pdf
-- old --
a
b
c
d
e
f
g
-- new --
w
a
b
x
y
z
e
-- diff --
diff old new
--- old
+++ new
@@ -1,7 +1,7 @@
+w
a
b
-c
-d
+x
+y
+z
e
-f
-g

40
grammar/internal/diff/testdata/dups.txt vendored Normal file
View File

@@ -0,0 +1,40 @@
-- old --
a
b
c
d
e
f
-- new --
a
B
C
d
e
f
-- diff --
diff old new
--- old
+++ new
@@ -1,8 +1,8 @@
a
$
-b
-
-c
+B
+
+C
$
d
$

38
grammar/internal/diff/testdata/end.txt vendored Normal file
View File

@@ -0,0 +1,38 @@
-- old --
1
2
3
4
5
6
7
eight
nine
ten
eleven
-- new --
1
2
3
4
5
6
7
8
9
10
-- diff --
diff old new
--- old
+++ new
@@ -5,7 +5,6 @@
5
6
7
-eight
-nine
-ten
-eleven
+8
+9
+10

View File

@@ -0,0 +1,9 @@
-- old --
a
b
c^D
-- new --
a
b
c^D
-- diff --

18
grammar/internal/diff/testdata/eof1.txt vendored Normal file
View File

@@ -0,0 +1,18 @@
-- old --
a
b
c
-- new --
a
b
c^D
-- diff --
diff old new
--- old
+++ new
@@ -1,3 +1,3 @@
a
b
-c
+c
\ No newline at end of file

18
grammar/internal/diff/testdata/eof2.txt vendored Normal file
View File

@@ -0,0 +1,18 @@
-- old --
a
b
c^D
-- new --
a
b
c
-- diff --
diff old new
--- old
+++ new
@@ -1,3 +1,3 @@
a
b
-c
\ No newline at end of file
+c

62
grammar/internal/diff/testdata/long.txt vendored Normal file
View File

@@ -0,0 +1,62 @@
-- old --
1
2
3
4
5
6
7
8
9
10
11
12
13
14
14½
15
16
17
18
19
20
-- new --
1
2
3
4
5
6
8
9
10
11
12
13
14
17
18
19
20
-- diff --
diff old new
--- old
+++ new
@@ -4,7 +4,6 @@
4
5
6
-7
8
9
10
@@ -12,9 +11,6 @@
12
13
14
-14½
-15
-16
17
18
19

View File

@@ -0,0 +1,5 @@
-- old --
hello world
-- new --
hello world
-- diff --

View File

@@ -0,0 +1,34 @@
-- old --
e
pi
4
5
6
7
8
9
10
-- new --
1
2
3
4
5
6
7
8
9
10
-- diff --
diff old new
--- old
+++ new
@@ -1,5 +1,6 @@
-e
-pi
+1
+2
+3
4
5
6

40
grammar/internal/diff/testdata/triv.txt vendored Normal file
View File

@@ -0,0 +1,40 @@
Another example from Hunt and McIlroy,
“An Algorithm for Differential File Comparison.”
https://www.cs.dartmouth.edu/~doug/diff.pdf
Anchored diff gives up on finding anything,
since there are no unique lines.
-- old --
a
b
c
a
b
b
a
-- new --
c
a
b
a
b
c
-- diff --
diff old new
--- old
+++ new
@@ -1,7 +1,6 @@
-a
-b
-c
-a
-b
-b
-a
+c
+a
+b
+a
+b
+c

View File

@@ -0,0 +1,171 @@
package jsonschema
import (
"bytes"
"encoding/json"
"errors"
)
// Schema holds a JSON schema.
type Schema struct {
// Name is the name of the property. For the parent/root property, this
// is "root". For child properties, this is the name of the property.
Name string `json:"-"`
// Type is the type of the property.
//
// TODO: Union types (e.g. make this a []string).
Type string
// PrefixItems is a list of schemas for each item in a tuple. By
// default, the tuple is "closed." unless Items is set to true or a
// valid Schema.
PrefixItems []*Schema
// Items is the schema for each item in a list.
//
// If it is missing, or its JSON value is "null" or "false", it is nil.
// If the JSON value is "true", it is set to the empty Schema. If the
// JSON value is an object, it will be decoded as a Schema.
Items *Schema
// MinItems specifies the minimum number of items allowed in a list.
MinItems int
// MaxItems specifies the maximum number of items allowed in a list.
MaxItems int
// Properties is the schema for each property of an object.
Properties []*Schema
// Format is the format of the property. This is used to validate the
// property against a specific format.
//
// It is the callers responsibility to validate the property against
// the format.
Format string
// Minimum specifies the minimum value for numeric properties.
Minimum float64
// Maximum specifies the maximum value for numeric properties.
Maximum float64
// Enum is a list of valid values for the property.
Enum []json.RawMessage
}
func (s *Schema) UnmarshalJSON(data []byte) error {
type S Schema
w := struct {
Properties props
Items items
*S
}{
S: (*S)(s),
}
if err := json.Unmarshal(data, &w); err != nil {
return err
}
if w.Items.set {
s.Items = &w.Items.Schema
}
s.Properties = w.Properties
return nil
}
type items struct {
Schema
set bool
}
func (s *items) UnmarshalJSON(data []byte) error {
switch b := data[0]; b {
case 't':
*s = items{set: true}
case '{':
type I items
if err := json.Unmarshal(data, (*I)(s)); err != nil {
return err
}
s.set = true
case 'n', 'f':
default:
return errors.New("invalid Items")
}
return nil
}
// EffectiveType returns the effective type of the schema. If the Type field is
// not empty, it is returned; otherwise:
//
// - If the schema has both Properties and Items, it returns an empty string.
// - If the schema has Properties, it returns "object".
// - If the schema has Items, it returns "array".
// - If the schema has neither Properties nor Items, it returns "value".
//
// The returned string is never empty.
func (d *Schema) EffectiveType() string {
if d.Type == "" {
if len(d.Properties) > 0 {
return "object"
}
if len(d.PrefixItems) > 0 || d.Items != nil {
return "array"
}
return "value"
}
return d.Type
}
// props is an ordered list of properties. The order of the properties
// is the order in which they were defined in the schema.
type props []*Schema
var _ json.Unmarshaler = (*props)(nil)
func (v *props) UnmarshalJSON(data []byte) error {
if len(data) == 0 {
return nil
}
if data[0] != '{' {
return errors.New("expected object")
}
d := json.NewDecoder(bytes.NewReader(data))
// TODO(bmizerany): Consider DisallowUnknownFields. Currently, we, like
// llama.cpp, ignore unknown fields, which could be lead to unexpected
// behavior for clients of this package, since they may not be aware
// that "additionalFields", "itemsPrefix", etc, are being ignored.
//
// For now, just do what llama.cpp does.
t, err := d.Token()
if err != nil {
return err
}
if t != json.Delim('{') {
return errors.New("expected object")
}
for d.More() {
// Use the first token (map key) as the property name, then
// decode the rest of the object fields into a Schema and
// append.
t, err := d.Token()
if err != nil {
return err
}
if t == json.Delim('}') {
return nil
}
s := &Schema{
Name: t.(string),
}
if err := d.Decode(s); err != nil {
return err
}
*v = append(*v, s)
}
return nil
}

View File

@@ -0,0 +1,104 @@
package jsonschema
import (
"encoding/json"
"reflect"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
)
const testSchemaBasic = `
{
"properties": {
"tupleClosedEmpty": { "prefixItems": [] },
"tupleClosedMissing": { "prefixItems": [{}] },
"tupleClosedNull": { "prefixItems": [{}], "items": null },
"tupleClosedFalse": { "prefixItems": [{}], "items": false },
"tupleOpenTrue": { "prefixItems": [{}], "items": true },
"tupleOpenEmpty": { "prefixItems": [{}], "items": {} },
"tupleOpenTyped": { "prefixItems": [{}], "items": {"type": "boolean"} },
"tupleOpenMax": { "prefixItems": [{}], "items": true, "maxItems": 3},
"array": { "items": {"type": "number"} },
"null": { "type": "null" },
"string": { "type": "string" },
"boolean": { "type": "boolean" }
}
}
`
func TestSchemaUnmarshal(t *testing.T) {
var got *Schema
if err := json.Unmarshal([]byte(testSchemaBasic), &got); err != nil {
t.Fatalf("Unmarshal: %v", err)
}
want := &Schema{
Properties: []*Schema{
{Name: "tupleClosedEmpty", PrefixItems: []*Schema{}, Items: nil},
{Name: "tupleClosedMissing", PrefixItems: []*Schema{{}}, Items: nil},
{Name: "tupleClosedNull", PrefixItems: []*Schema{{}}, Items: nil},
{Name: "tupleClosedFalse", PrefixItems: []*Schema{{}}, Items: nil},
{Name: "tupleOpenTrue", PrefixItems: []*Schema{{}}, Items: &Schema{}},
{Name: "tupleOpenEmpty", PrefixItems: []*Schema{{}}, Items: &Schema{}},
{Name: "tupleOpenTyped", PrefixItems: []*Schema{{}}, Items: &Schema{Type: "boolean"}},
{Name: "tupleOpenMax", PrefixItems: []*Schema{{}}, Items: &Schema{}, MaxItems: 3},
{Name: "array", Items: &Schema{Type: "number"}},
{Name: "null", Type: "null"},
{Name: "string", Type: "string"},
{Name: "boolean", Type: "boolean"},
},
}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("(-want, +got)\n%s", diff)
}
}
func TestEffectiveType(t *testing.T) {
const schema = `
{"properties": {
"o": {"type": "object"},
"a": {"type": "array"},
"n": {"type": "number"},
"s": {"type": "string"},
"z": {"type": "null"},
"b": {"type": "boolean"},
"t0": {"prefixItems": [{}], "items": {"type": "number"}},
"t1": {"items": {"type": "number"}, "maxItems": 3},
"v": {"maxItems": 3}
}}
`
var s *Schema
if err := json.Unmarshal([]byte(schema), &s); err != nil {
t.Fatalf("json.Unmarshal: %v", err)
}
var got []string
for _, p := range s.Properties {
got = append(got, p.EffectiveType())
}
want := strings.Fields(`
object
array
number
string
null
boolean
array
array
value
`)
if !reflect.DeepEqual(want, got) {
t.Errorf("\ngot:\n\t%v\nwant:\n\t%v", got, want)
}
}

76
grammar/testdata/schemas.txt vendored Normal file
View File

@@ -0,0 +1,76 @@
# This file holds tests for JSON schema to EBNF grammar conversions.
#
# The format is a JSON schema, followed by the expected EBNF grammar. Each test
# MAY be preceded by a comment that describes the test (e.g. the test name), followed by
# the JSON schema and the expected EBNF grammar. If no comment is present, the test
# name the tests number in the file (e.g. "#0", "#1", etc.)
#
# Blank lines signify the end or start of a new test. Comments can be added
# anywhere in the file, but they must be preceded by a '#' character and start at
# the beginning of the line.
# default
{}
root ::= value;
{"properties": {}}
root ::= value;
# array
{"properties": {"a": {"type": "array", "items": {"type": "string"}}}}
root_0_tuple_0 ::= string;
root_0 ::= "[" ( root_0_tuple_0 )* "]";
root ::= "{" "a" ":" root_0 "}";
# array with nested array
{"type": "array", "items": {"type": "array", "items": {"type": "string"}}}
root_tuple_0_tuple_0 ::= string;
root_tuple_0 ::= "[" ( root_tuple_0_tuple_0 )* "]";
root ::= "[" ( root_tuple_0 )* "]";
# object
{"properties": {"e": {}}}
root_0 ::= value;
root ::= "{" "e" ":" root_0 "}";
# object with nested object
{"properties": {"o": {"type": "object", "properties": {"e": {}}}}}
root_0_0 ::= value;
root_0 ::= "{" "e" ":" root_0_0 "}";
root ::= "{" "o" ":" root_0 "}";
# boolean
{"type": "boolean"}
root ::= boolean;
# number
{"properties": {"n": {"type": "number", "minimum": 123, "maximum": 4567}}}
root_0 ::= number;
root ::= "{" "n" ":" root_0 "}";
# string
{"type": "string"}
root ::= string;
# string with enum
{"type": "string", "enum": ["a", "b", "c"]}
root ::= ( "\"a\"" "|" "\"b\"" "|" "\"c\"" );
# spaces in key
{"properties": {"a b": {}}}
root_0 ::= value;
root ::= "{" "a b" ":" root_0 "}";
# issue7978
{ "type": "object", "properties": { "steps": { "type": "array", "items": { "type": "object", "properties": { "explanation": { "type": "string" }, "output": { "type": "string" } }, "required": [ "explanation", "output" ], "additionalProperties": false } }, "final_answer": { "type": "string" } }, "required": [ "steps", "final_answer" ], "additionalProperties": false }
root_0_tuple_0_0 ::= string;
root_0_tuple_0_1 ::= string;
root_0_tuple_0 ::= "{" "explanation" ":" root_0_tuple_0_0 "," "output" ":" root_0_tuple_0_1 "}";
root_0 ::= "[" ( root_0_tuple_0 )* "]";
root_1 ::= string;
root ::= "{" "steps" ":" root_0 "," "final_answer" ":" root_1 "}";
# !! # special characters in key
# !! {"properties": {"a!b": {}}}
# !! !invalid character '!' in key
# !!

View File

@@ -37,8 +37,7 @@ go build -tags avx .
```shell
# go doesn't recognize `-mfma` as a valid compiler flag
# see https://github.com/golang/go/issues/17895
go env -w "CGO_CFLAGS_ALLOW=-mfma|-mf16c"
go env -w "CGO_CXXFLAGS_ALLOW=-mfma|-mf16c"
go env -w "CGO_CPPFLAGS_ALLOW=-mfma|-mf16c"
go build -tags=avx,avx2 .
```

34
llama/amx.h vendored
View File

@@ -1,34 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "ggml-backend.h"
#include "ggml-cpu-impl.h"
// GGML internal header
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
#endif

51
llama/ggml-blas.h vendored
View File

@@ -1,51 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
// number of threads used for conversion to float
// for openblas and blis, this will also set the number of threads used for blas operations
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
#ifdef __cplusplus
}
#endif

View File

@@ -1,34 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "ggml-cpu-traits.h"
#include "ggml.h"
// GGML internal header
ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);

View File

@@ -1,64 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "ggml-backend-impl.h"
#include "ggml-cpu-impl.h"
#include "ggml.h"
#ifdef __cplusplus
# include <vector>
extern "C" {
#endif
// return true if op part of extra "accelerator"
bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op);
bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size);
#ifdef __cplusplus
}
namespace ggml::cpu {
// register in tensor->extra
class tensor_traits {
public:
virtual ~tensor_traits();
virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size) = 0;
virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0;
};
class extra_buffer_type {
public:
virtual ~extra_buffer_type();
virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0;
};
} // namespace ggml::cpu
// implemented in ggml-cpu.cpp.
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type();
#endif

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_ACC_BLOCK_SIZE 256
void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,60 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "arange.cuh"
static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) {
// blockIDx.x: idx of ne0 / BLOCK_SIZE
int nidx = threadIdx.x + blockIdx.x * blockDim.x;
if (nidx >= ne0) {
return;
}
dst[nidx] = start + step * nidx;
}
static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) {
int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE;
arange_f32<<<num_blocks, CUDA_ARANGE_BLOCK_SIZE, 0, stream>>>(dst, ne0, start, step);
}
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();
GGML_ASSERT(dst->type == GGML_TYPE_F32);
float start;
float stop;
float step;
memcpy(&start, (float *)dst->op_params + 0, sizeof(float));
memcpy(&stop, (float *)dst->op_params + 1, sizeof(float));
memcpy(&step, (float *)dst->op_params + 2, sizeof(float));
int64_t steps = (int64_t)ceil((stop - start) / step);
GGML_ASSERT(ggml_nelements(dst) == steps);
arange_f32_cuda(dst_d, dst->ne[0], start, step, stream);
}

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_ARANGE_BLOCK_SIZE 256
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,35 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,60 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "clamp.cuh"
static __global__ void clamp_f32(const float * x, float * dst, const float min, const float max, const int k) {
const int i = blockDim.x*blockIdx.x + threadIdx.x;
if (i >= k) {
return;
}
dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
}
static void clamp_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) {
const int num_blocks = (k + CUDA_CLAMP_BLOCK_SIZE - 1) / CUDA_CLAMP_BLOCK_SIZE;
clamp_f32<<<num_blocks, CUDA_CLAMP_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k);
}
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
float min;
float max;
memcpy(&min, dst->op_params, sizeof(float));
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream);
}

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_CLAMP_BLOCK_SIZE 256
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_CONCAT_BLOCK_SIZE 256
void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE 256
void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,39 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
template<typename T>
using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t k, cudaStream_t stream);
typedef to_t_cuda_t<float> to_fp32_cuda_t;
typedef to_t_cuda_t<half> to_fp16_cuda_t;
to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type);
to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_COUNT_EQUAL_CHUNK_SIZE 128
void ggml_cuda_count_equal(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,35 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_CPY_BLOCK_SIZE 64
void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1);
void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1);

View File

@@ -1,33 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_CROSS_ENTROPY_LOSS_BLOCK_SIZE 256
void ggml_cuda_cross_entropy_loss(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_cross_entropy_loss_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32
void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_GET_ROWS_BLOCK_SIZE 256
void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_IM2COL_BLOCK_SIZE 256
void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,38 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
// maximum number of src0 rows with which to use mul_mat_vec over cuBLAS if FP16 tensor cores are available
#define MMV_MAX_ROWS 512
void ggml_cuda_mul_mat_vec(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
void ggml_cuda_op_mul_mat_vec(
ggml_backend_cuda_context & ctx,
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
const int64_t src1_padded_row_size, cudaStream_t stream);

View File

@@ -1,35 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels.
void ggml_cuda_op_mul_mat_vec_q(
ggml_backend_cuda_context & ctx,
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
const int64_t src1_padded_row_size, cudaStream_t stream);

View File

@@ -1,33 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_op_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_OPT_STEP_ADAMW_BLOCK_SIZE 256
void ggml_cuda_opt_step_adamw(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,29 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,32 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_PAD_BLOCK_SIZE 256
void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_POOL2D_BLOCK_SIZE 256
void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,50 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
#include "common.cuh"
#include "mmq.cuh"
#include <cstdint>
#define CUDA_QUANTIZE_BLOCK_SIZE 256
#define CUDA_QUANTIZE_BLOCK_SIZE_MMQ 128
static_assert(MATRIX_ROW_PADDING % CUDA_QUANTIZE_BLOCK_SIZE == 0, "Risk of out-of-bounds access.");
static_assert(MATRIX_ROW_PADDING % (4*CUDA_QUANTIZE_BLOCK_SIZE_MMQ) == 0, "Risk of out-of-bounds access.");
typedef void (*quantize_cuda_t)(
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
const ggml_type type_x, cudaStream_t stream);
void quantize_row_q8_1_cuda(
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
const ggml_type type_x, cudaStream_t stream);
void quantize_mmq_q8_1_cuda(
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
const ggml_type type_x, cudaStream_t stream);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_ROPE_BLOCK_SIZE 256
void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,57 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "scale.cuh"
static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) {
const int i = blockDim.x*blockIdx.x + threadIdx.x;
if (i >= k) {
return;
}
dst[i] = scale * x[i];
}
static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) {
const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE;
scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k);
}
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
float scale;
memcpy(&scale, dst->op_params, sizeof(float));
scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream);
}

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_SCALE_BLOCK_SIZE 256
void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
#define CUDA_SOFT_MAX_BLOCK_SIZE 1024
void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream);
void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,65 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "sumrows.cuh"
static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) {
const int row = blockIdx.x;
const int col = threadIdx.x;
float sum = 0.0f;
for (int i = col; i < ncols; i += blockDim.x) {
sum += x[row * ncols + i];
}
sum = warp_reduce_sum(sum);
if (col == 0) {
dst[row] = sum;
}
}
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) {
const dim3 block_dims(WARP_SIZE, 1, 1);
const dim3 block_nums(nrows, 1, 1);
k_sum_rows_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
}
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
GGML_ASSERT(ggml_is_contiguous(src0));
const int64_t ncols = src0->ne[0];
const int64_t nrows = ggml_nrows(src0);
sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream);
}

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "common.cuh"
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream);
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);

View File

@@ -1,31 +0,0 @@
/**
* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);

Some files were not shown because too many files have changed in this diff Show More