mirror of
https://github.com/ollama/ollama.git
synced 2026-01-14 10:28:31 -05:00
Compare commits
19 Commits
mattw/quan
...
v0.1.18
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3a9f447141 | ||
|
|
9c2941e61b | ||
|
|
238ac5e765 | ||
|
|
4f4980b66b | ||
|
|
22e93efa41 | ||
|
|
2909dce894 | ||
|
|
df32537312 | ||
|
|
3367b5f3df | ||
|
|
46edbbc518 | ||
|
|
d2ff18cd6b | ||
|
|
df086d3c8c | ||
|
|
f9961c70ae | ||
|
|
cd8fad3398 | ||
|
|
9983fa5f4e | ||
|
|
dfda91c2ee | ||
|
|
fac9060da5 | ||
|
|
a554616f8e | ||
|
|
77d96da94b | ||
|
|
0d6e3565ae |
@@ -2,7 +2,7 @@
|
||||
ollama
|
||||
app
|
||||
dist
|
||||
llm/llama.cpp/gguf
|
||||
llm/llama.cpp
|
||||
.env
|
||||
.cache
|
||||
test_data
|
||||
9
.gitmodules
vendored
9
.gitmodules
vendored
@@ -1,5 +1,4 @@
|
||||
[submodule "llm/llama.cpp/gguf"]
|
||||
path = llm/llama.cpp/gguf
|
||||
url = https://github.com/ggerganov/llama.cpp.git
|
||||
ignore = dirty
|
||||
shallow = true
|
||||
[submodule "llama.cpp"]
|
||||
path = llm/llama.cpp
|
||||
url = https://github.com/ggerganov/llama.cpp.git
|
||||
shallow = true
|
||||
159
Dockerfile.build
159
Dockerfile.build
@@ -1,74 +1,101 @@
|
||||
# Ubuntu 20.04 amd64 dependencies
|
||||
FROM --platform=linux/amd64 ubuntu:20.04 AS base-amd64
|
||||
ARG CUDA_VERSION=11.3.1-1
|
||||
ARG CMAKE_VERSION=3.22.1
|
||||
# ROCm only supports amd64
|
||||
ARG ROCM_VERSION=6.0
|
||||
ARG CLBLAST_VER=1.6.1
|
||||
|
||||
# Note: https://rocm.docs.amd.com/en/latest/release/user_kernel_space_compat_matrix.html
|
||||
RUN apt-get update && \
|
||||
apt-get install -y wget gnupg && \
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \
|
||||
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
|
||||
echo "deb [by-hash=no] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" > /etc/apt/sources.list.d/cuda.list && \
|
||||
wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.sh" -O /tmp/cmake-installer.sh && \
|
||||
chmod +x /tmp/cmake-installer.sh && /tmp/cmake-installer.sh --skip-license --prefix=/usr && \
|
||||
mkdir --parents --mode=0755 /etc/apt/keyrings && \
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor > /etc/apt/keyrings/rocm.gpg && \
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} focal main" > /etc/apt/sources.list.d/rocm.list && \
|
||||
echo "Package: *" > /etc/apt/preferences.d/rocm-pin-600 && \
|
||||
echo "Pin: release o=repo.radeon.com" >> /etc/apt/preferences.d/rocm-pin-600 && \
|
||||
echo "Pin-Priority: 600" >> /etc/apt/preferences.d/rocm-pin-600 && \
|
||||
apt-get update && \
|
||||
DEBIAN_FRONTEND=noninteractive apt-get -y install cuda=${CUDA_VERSION} rocm-hip-libraries rocm-device-libs rocm-libs rocm-ocl-icd rocm-hip-sdk rocm-hip-libraries rocm-cmake rocm-clang-ocl rocm-dev
|
||||
|
||||
# CLBlast
|
||||
RUN wget -qO- https://github.com/CNugteren/CLBlast/archive/refs/tags/${CLBLAST_VER}.tar.gz | tar zxv -C /tmp/ && \
|
||||
cd /tmp/CLBlast-${CLBLAST_VER} && mkdir build && cd build && cmake .. && make && make install
|
||||
|
||||
ENV ROCM_PATH=/opt/rocm
|
||||
|
||||
# Ubuntu 22.04 arm64 dependencies
|
||||
FROM --platform=linux/arm64 ubuntu:20.04 AS base-arm64
|
||||
ARG CUDA_VERSION=11.3.1-1
|
||||
ARG CMAKE_VERSION=3.27.6
|
||||
RUN apt-get update && \
|
||||
apt-get install -y wget gnupg && \
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/cuda-ubuntu2004.pin && \
|
||||
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa//3bf863cc.pub && \
|
||||
echo "deb [by-hash=no] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/ /" > /etc/apt/sources.list.d/cuda.list && \
|
||||
wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.sh" -O /tmp/cmake-installer.sh && \
|
||||
chmod +x /tmp/cmake-installer.sh && /tmp/cmake-installer.sh --skip-license --prefix=/usr && \
|
||||
apt-get update && \
|
||||
apt-cache madison cuda && \
|
||||
DEBIAN_FRONTEND=noninteractive apt-get -y install cuda=${CUDA_VERSION}
|
||||
|
||||
FROM base-${TARGETARCH}
|
||||
ARG TARGETARCH
|
||||
ARG GOFLAGS="'-ldflags -w -s'"
|
||||
ARG CGO_CFLAGS
|
||||
ARG GOLANG_VERSION=1.21.3
|
||||
ARG CMAKE_VERSION=3.22.1
|
||||
ARG CUDA_VERSION=11.3.1
|
||||
ARG ROCM_VERSION=5.7.1
|
||||
|
||||
# Common toolchain
|
||||
RUN apt-get update && \
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y gcc-10 g++-10 cpp-10 git ocl-icd-opencl-dev && \
|
||||
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10
|
||||
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
|
||||
|
||||
# install go
|
||||
ADD https://dl.google.com/go/go${GOLANG_VERSION}.linux-$TARGETARCH.tar.gz /tmp/go${GOLANG_VERSION}.tar.gz
|
||||
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go${GOLANG_VERSION}.tar.gz
|
||||
ARG CMAKE_VERSION
|
||||
|
||||
RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl \
|
||||
&& yum update -y \
|
||||
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236
|
||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||
|
||||
ADD https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-linux-x86_64.tar.gz /tmp/cmake-$CMAKE_VERSION.tar.gz
|
||||
RUN tar -zx -C /usr --strip-components 1 </tmp/cmake-$CMAKE_VERSION.tar.gz
|
||||
|
||||
# build the final binary
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
|
||||
ENV GOOS=linux
|
||||
ENV GOARCH=$TARGETARCH
|
||||
ENV GOFLAGS=$GOFLAGS
|
||||
ENV CGO_CFLAGS=${CGO_CFLAGS}
|
||||
WORKDIR llm/generate
|
||||
RUN sh gen_linux.sh
|
||||
|
||||
RUN /usr/local/go/bin/go generate ./... && \
|
||||
/usr/local/go/bin/go build .
|
||||
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
|
||||
|
||||
ARG CMAKE_VERSION
|
||||
|
||||
RUN dnf install -y git cmake
|
||||
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
|
||||
WORKDIR llm/generate
|
||||
RUN sh gen_linux.sh
|
||||
|
||||
FROM --platform=linux/amd64 rocm/dev-centos-7:$ROCM_VERSION-complete AS rocm-build-amd64
|
||||
|
||||
ARG CMAKE_VERSION
|
||||
|
||||
RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl \
|
||||
&& yum update -y \
|
||||
&& yum remove -y git \
|
||||
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236
|
||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||
ENV LIBRARY_PATH /opt/amdgpu/lib64
|
||||
|
||||
ADD https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-linux-x86_64.tar.gz /tmp/cmake-$CMAKE_VERSION.tar.gz
|
||||
RUN tar -zx -C /usr --strip-components 1 </tmp/cmake-$CMAKE_VERSION.tar.gz
|
||||
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
|
||||
WORKDIR llm/generate
|
||||
RUN sh gen_linux.sh
|
||||
|
||||
FROM --platform=linux/amd64 centos:7 AS build-amd64
|
||||
ENV CGO_ENABLED 1
|
||||
|
||||
ARG GOLANG_VERSION
|
||||
ARG GOFLAGS
|
||||
ARG CGO_FLAGS
|
||||
|
||||
RUN yum install -y centos-release-scl \
|
||||
&& yum update -y \
|
||||
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++
|
||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||
|
||||
ADD https://dl.google.com/go/go$GOLANG_VERSION.linux-amd64.tar.gz /tmp/go-$GOLANG_VERSION.tar.gz
|
||||
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go-$GOLANG_VERSION.tar.gz
|
||||
ENV PATH /usr/local/go/bin:$PATH
|
||||
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cpu/lib llm/llama.cpp/build/linux/cpu/lib
|
||||
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cuda/lib llm/llama.cpp/build/linux/cuda/lib
|
||||
COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/rocm/lib llm/llama.cpp/build/linux/rocm/lib
|
||||
RUN go build .
|
||||
|
||||
FROM --platform=linux/arm64 centos:7 AS build-arm64
|
||||
ENV CGO_ENABLED 1
|
||||
|
||||
ARG GOLANG_VERSION
|
||||
ARG GOFLAGS
|
||||
ARG CGO_FLAGS
|
||||
|
||||
RUN yum install -y centos-release-scl \
|
||||
&& yum update -y \
|
||||
&& yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++
|
||||
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
|
||||
|
||||
ADD https://dl.google.com/go/go$GOLANG_VERSION.linux-arm64.tar.gz /tmp/go-$GOLANG_VERSION.tar.gz
|
||||
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go-$GOLANG_VERSION.tar.gz
|
||||
ENV PATH /usr/local/go/bin:$PATH
|
||||
|
||||
WORKDIR /go/src/github.com/jmorganca/ollama
|
||||
COPY . .
|
||||
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cpu/lib llm/llama.cpp/build/linux/cpu/lib
|
||||
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/cuda/lib llm/llama.cpp/build/linux/cuda/lib
|
||||
RUN go build .
|
||||
|
||||
FROM build-$TARGETARCH
|
||||
|
||||
@@ -309,6 +309,13 @@ func (c *Client) Heartbeat(ctx context.Context) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error) {
|
||||
var resp EmbeddingResponse
|
||||
if err := c.do(ctx, http.MethodPost, "/api/embeddings", req, &resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &resp, nil
|
||||
}
|
||||
|
||||
func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
|
||||
if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {
|
||||
|
||||
@@ -148,7 +148,12 @@ type DeleteRequest struct {
|
||||
}
|
||||
|
||||
type ShowRequest struct {
|
||||
Name string `json:"name"`
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
System string `json:"system"`
|
||||
Template string `json:"template"`
|
||||
|
||||
Options map[string]interface{} `json:"options"`
|
||||
}
|
||||
|
||||
type ShowResponse struct {
|
||||
|
||||
543
cmd/cmd.go
543
cmd/cmd.go
@@ -17,7 +17,6 @@ import (
|
||||
"os/exec"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
@@ -26,14 +25,12 @@ import (
|
||||
"github.com/olekukonko/tablewriter"
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/crypto/ssh"
|
||||
"golang.org/x/exp/slices"
|
||||
"golang.org/x/term"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/format"
|
||||
"github.com/jmorganca/ollama/parser"
|
||||
"github.com/jmorganca/ollama/progress"
|
||||
"github.com/jmorganca/ollama/readline"
|
||||
"github.com/jmorganca/ollama/server"
|
||||
"github.com/jmorganca/ollama/version"
|
||||
)
|
||||
@@ -152,15 +149,29 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
|
||||
name := args[0]
|
||||
// check if the model exists on the server
|
||||
_, err = client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
model, err := client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
var statusError api.StatusError
|
||||
switch {
|
||||
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
|
||||
if err := PullHandler(cmd, args); err != nil {
|
||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||
return err
|
||||
}
|
||||
case err != nil:
|
||||
return err
|
||||
default:
|
||||
// the model was found, check if it is in the correct format
|
||||
if model.Details.Format != "" && model.Details.Format != "gguf" {
|
||||
// pull and retry to see if the model has been updated
|
||||
parts := strings.Split(name, string(os.PathSeparator))
|
||||
if len(parts) == 1 {
|
||||
// this is a library model, log some info
|
||||
fmt.Fprintln(os.Stderr, "This model is no longer compatible with Ollama. Pulling a new version...")
|
||||
}
|
||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||
fmt.Printf("Error: %s\n", err)
|
||||
return fmt.Errorf("unsupported model, please update this model to gguf format") // relay the original error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return RunGenerate(cmd, args)
|
||||
@@ -572,31 +583,12 @@ func generate(cmd *cobra.Command, opts generateOptions) error {
|
||||
}
|
||||
|
||||
if err := client.Generate(ctx, &request, fn); err != nil {
|
||||
switch {
|
||||
case errors.Is(err, context.Canceled):
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
case strings.Contains(err.Error(), "unsupported model format"):
|
||||
// pull and retry to see if the model has been updated
|
||||
parts := strings.Split(opts.Model, string(os.PathSeparator))
|
||||
if len(parts) == 1 {
|
||||
// this is a library model, log some info
|
||||
fmt.Fprintln(os.Stderr, "This model is no longer compatible with Ollama. Pulling a new version...")
|
||||
}
|
||||
if err := PullHandler(cmd, []string{opts.Model}); err != nil {
|
||||
fmt.Printf("Error: %s\n", err)
|
||||
return fmt.Errorf("unsupported model, please update this model to gguf format") // relay the original error
|
||||
}
|
||||
// retry
|
||||
if err := client.Generate(ctx, &request, fn); err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return err
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if opts.Prompt != "" {
|
||||
fmt.Println()
|
||||
fmt.Println()
|
||||
@@ -621,459 +613,6 @@ func generate(cmd *cobra.Command, opts generateOptions) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type MultilineState int
|
||||
|
||||
const (
|
||||
MultilineNone MultilineState = iota
|
||||
MultilinePrompt
|
||||
MultilineSystem
|
||||
MultilineTemplate
|
||||
)
|
||||
|
||||
func modelIsMultiModal(cmd *cobra.Command, name string) bool {
|
||||
// get model details
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return false
|
||||
}
|
||||
|
||||
req := api.ShowRequest{Name: name}
|
||||
resp, err := client.Show(cmd.Context(), &req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return slices.Contains(resp.Details.Families, "clip")
|
||||
}
|
||||
|
||||
func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
multiModal := modelIsMultiModal(cmd, opts.Model)
|
||||
|
||||
// load the model
|
||||
loadOpts := generateOptions{
|
||||
Model: opts.Model,
|
||||
Prompt: "",
|
||||
Images: []ImageData{},
|
||||
}
|
||||
if err := generate(cmd, loadOpts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
usage := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /set Set session variables")
|
||||
fmt.Fprintln(os.Stderr, " /show Show model information")
|
||||
fmt.Fprintln(os.Stderr, " /bye Exit")
|
||||
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
|
||||
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageSet := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter")
|
||||
fmt.Fprintln(os.Stderr, " /set system <string> Set system message")
|
||||
fmt.Fprintln(os.Stderr, " /set template <string> Set prompt template")
|
||||
fmt.Fprintln(os.Stderr, " /set history Enable history")
|
||||
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
|
||||
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set nowordwrap Disable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set format json Enable JSON mode")
|
||||
fmt.Fprintln(os.Stderr, " /set noformat Disable formatting")
|
||||
fmt.Fprintln(os.Stderr, " /set verbose Show LLM stats")
|
||||
fmt.Fprintln(os.Stderr, " /set quiet Disable LLM stats")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageShortcuts := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available keyboard shortcuts:")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + a Move to the beginning of the line (Home)")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + e Move to the end of the line (End)")
|
||||
fmt.Fprintln(os.Stderr, " Alt + b Move back (left) one word")
|
||||
fmt.Fprintln(os.Stderr, " Alt + f Move forward (right) one word")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + k Delete the sentence after the cursor")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + u Delete the sentence before the cursor")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + l Clear the screen")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + c Stop the model from responding")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + d Exit ollama (/bye)")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageShow := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /show license Show model license")
|
||||
fmt.Fprintln(os.Stderr, " /show modelfile Show Modelfile for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show parameters Show parameters for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show system Show system message")
|
||||
fmt.Fprintln(os.Stderr, " /show template Show prompt template")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
// only list out the most common parameters
|
||||
usageParameters := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Parameters:")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter seed <int> Random number seed")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n <int> Set how far back to look for repetitions")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_gpu <int> The number of layers to send to the GPU")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter stop \"<string>\", ... Set the stop parameters")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
scanner, err := readline.New(readline.Prompt{
|
||||
Prompt: ">>> ",
|
||||
AltPrompt: "... ",
|
||||
Placeholder: "Send a message (/? for help)",
|
||||
AltPlaceholder: `Use """ to end multi-line input`,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Print(readline.StartBracketedPaste)
|
||||
defer fmt.Printf(readline.EndBracketedPaste)
|
||||
|
||||
var multiline MultilineState
|
||||
var prompt string
|
||||
|
||||
for {
|
||||
line, err := scanner.Readline()
|
||||
switch {
|
||||
case errors.Is(err, io.EOF):
|
||||
fmt.Println()
|
||||
return nil
|
||||
case errors.Is(err, readline.ErrInterrupt):
|
||||
if line == "" {
|
||||
fmt.Println("\nUse Ctrl + d or /bye to exit.")
|
||||
}
|
||||
|
||||
scanner.Prompt.UseAlt = false
|
||||
prompt = ""
|
||||
|
||||
continue
|
||||
case err != nil:
|
||||
return err
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(prompt, `"""`):
|
||||
// if the prompt so far starts with """ then we're in multiline mode
|
||||
// and we need to keep reading until we find a line that ends with """
|
||||
cut, found := strings.CutSuffix(line, `"""`)
|
||||
prompt += cut
|
||||
|
||||
if !found {
|
||||
prompt += "\n"
|
||||
continue
|
||||
}
|
||||
|
||||
prompt = strings.TrimPrefix(prompt, `"""`)
|
||||
scanner.Prompt.UseAlt = false
|
||||
|
||||
switch multiline {
|
||||
case MultilineSystem:
|
||||
opts.System = prompt
|
||||
prompt = ""
|
||||
fmt.Println("Set system message.")
|
||||
case MultilineTemplate:
|
||||
opts.Template = prompt
|
||||
prompt = ""
|
||||
fmt.Println("Set prompt template.")
|
||||
}
|
||||
multiline = MultilineNone
|
||||
case strings.HasPrefix(line, `"""`) && len(prompt) == 0:
|
||||
scanner.Prompt.UseAlt = true
|
||||
multiline = MultilinePrompt
|
||||
prompt += line + "\n"
|
||||
continue
|
||||
case scanner.Pasting:
|
||||
prompt += line + "\n"
|
||||
continue
|
||||
case strings.HasPrefix(line, "/list"):
|
||||
args := strings.Fields(line)
|
||||
if err := ListHandler(cmd, args[1:]); err != nil {
|
||||
return err
|
||||
}
|
||||
case strings.HasPrefix(line, "/set"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
switch args[1] {
|
||||
case "history":
|
||||
scanner.HistoryEnable()
|
||||
case "nohistory":
|
||||
scanner.HistoryDisable()
|
||||
case "wordwrap":
|
||||
opts.WordWrap = true
|
||||
fmt.Println("Set 'wordwrap' mode.")
|
||||
case "nowordwrap":
|
||||
opts.WordWrap = false
|
||||
fmt.Println("Set 'nowordwrap' mode.")
|
||||
case "verbose":
|
||||
cmd.Flags().Set("verbose", "true")
|
||||
fmt.Println("Set 'verbose' mode.")
|
||||
case "quiet":
|
||||
cmd.Flags().Set("verbose", "false")
|
||||
fmt.Println("Set 'quiet' mode.")
|
||||
case "format":
|
||||
if len(args) < 3 || args[2] != "json" {
|
||||
fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
|
||||
} else {
|
||||
opts.Format = args[2]
|
||||
fmt.Printf("Set format to '%s' mode.\n", args[2])
|
||||
}
|
||||
case "noformat":
|
||||
opts.Format = ""
|
||||
fmt.Println("Disabled format.")
|
||||
case "parameter":
|
||||
if len(args) < 4 {
|
||||
usageParameters()
|
||||
continue
|
||||
}
|
||||
var params []string
|
||||
for _, p := range args[3:] {
|
||||
params = append(params, p)
|
||||
}
|
||||
fp, err := api.FormatParams(map[string][]string{args[2]: params})
|
||||
if err != nil {
|
||||
fmt.Printf("Couldn't set parameter: %q\n\n", err)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Set parameter '%s' to '%s'\n\n", args[2], strings.Join(params, ", "))
|
||||
opts.Options[args[2]] = fp[args[2]]
|
||||
case "system", "template":
|
||||
if len(args) < 3 {
|
||||
usageSet()
|
||||
continue
|
||||
}
|
||||
line := strings.Join(args[2:], " ")
|
||||
line = strings.TrimPrefix(line, `"""`)
|
||||
if strings.HasPrefix(args[2], `"""`) {
|
||||
cut, found := strings.CutSuffix(line, `"""`)
|
||||
prompt += cut
|
||||
if found {
|
||||
if args[1] == "system" {
|
||||
opts.System = prompt
|
||||
fmt.Println("Set system message.")
|
||||
} else {
|
||||
opts.Template = prompt
|
||||
fmt.Println("Set prompt template.")
|
||||
}
|
||||
prompt = ""
|
||||
} else {
|
||||
prompt = `"""` + prompt + "\n"
|
||||
if args[1] == "system" {
|
||||
multiline = MultilineSystem
|
||||
} else {
|
||||
multiline = MultilineTemplate
|
||||
}
|
||||
scanner.Prompt.UseAlt = true
|
||||
}
|
||||
} else {
|
||||
opts.System = line
|
||||
fmt.Println("Set system message.")
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
} else {
|
||||
usageSet()
|
||||
}
|
||||
case strings.HasPrefix(line, "/show"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return err
|
||||
}
|
||||
resp, err := client.Show(cmd.Context(), &api.ShowRequest{Name: opts.Model})
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't get model")
|
||||
return err
|
||||
}
|
||||
|
||||
switch args[1] {
|
||||
case "license":
|
||||
if resp.License == "" {
|
||||
fmt.Print("No license was specified for this model.\n\n")
|
||||
} else {
|
||||
fmt.Println(resp.License)
|
||||
}
|
||||
case "modelfile":
|
||||
fmt.Println(resp.Modelfile)
|
||||
case "parameters":
|
||||
if resp.Parameters == "" {
|
||||
fmt.Print("No parameters were specified for this model.\n\n")
|
||||
} else {
|
||||
if len(opts.Options) > 0 {
|
||||
fmt.Println("User defined parameters:")
|
||||
for k, v := range opts.Options {
|
||||
fmt.Printf("%-*s %v\n", 30, k, v)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
fmt.Println("Model defined parameters:")
|
||||
fmt.Println(resp.Parameters)
|
||||
}
|
||||
case "system":
|
||||
switch {
|
||||
case opts.System != "":
|
||||
fmt.Println(opts.System + "\n")
|
||||
case resp.System != "":
|
||||
fmt.Println(resp.System + "\n")
|
||||
default:
|
||||
fmt.Print("No system message was specified for this model.\n\n")
|
||||
}
|
||||
case "template":
|
||||
switch {
|
||||
case opts.Template != "":
|
||||
fmt.Println(opts.Template + "\n")
|
||||
case resp.Template != "":
|
||||
fmt.Println(resp.Template)
|
||||
default:
|
||||
fmt.Print("No prompt template was specified for this model.\n\n")
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
} else {
|
||||
usageShow()
|
||||
}
|
||||
case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
switch args[1] {
|
||||
case "set", "/set":
|
||||
usageSet()
|
||||
case "show", "/show":
|
||||
usageShow()
|
||||
case "shortcut", "shortcuts":
|
||||
usageShortcuts()
|
||||
}
|
||||
} else {
|
||||
usage()
|
||||
}
|
||||
case line == "/exit", line == "/bye":
|
||||
return nil
|
||||
case strings.HasPrefix(line, "/"):
|
||||
args := strings.Fields(line)
|
||||
isFile := false
|
||||
|
||||
if multiModal {
|
||||
for _, f := range extractFileNames(line) {
|
||||
if strings.HasPrefix(f, args[0]) {
|
||||
isFile = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isFile {
|
||||
prompt += line
|
||||
} else {
|
||||
fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
|
||||
continue
|
||||
}
|
||||
default:
|
||||
prompt += line
|
||||
}
|
||||
|
||||
if len(prompt) > 0 && multiline == MultilineNone {
|
||||
opts.Prompt = prompt
|
||||
if multiModal {
|
||||
newPrompt, images, err := extractFileData(prompt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.Prompt = newPrompt
|
||||
|
||||
// reset the context if we find another image
|
||||
if len(images) > 0 {
|
||||
opts.Images = images
|
||||
ctx := cmd.Context()
|
||||
ctx = context.WithValue(ctx, generateContextKey("context"), []int{})
|
||||
cmd.SetContext(ctx)
|
||||
}
|
||||
if len(opts.Images) == 0 {
|
||||
fmt.Println("This model requires you to add a jpeg, png, or svg image.")
|
||||
fmt.Println()
|
||||
prompt = ""
|
||||
continue
|
||||
}
|
||||
}
|
||||
if err := generate(cmd, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
prompt = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeFilePath(fp string) string {
|
||||
// Define a map of escaped characters and their replacements
|
||||
replacements := map[string]string{
|
||||
"\\ ": " ", // Escaped space
|
||||
"\\(": "(", // Escaped left parenthesis
|
||||
"\\)": ")", // Escaped right parenthesis
|
||||
"\\[": "[", // Escaped left square bracket
|
||||
"\\]": "]", // Escaped right square bracket
|
||||
"\\{": "{", // Escaped left curly brace
|
||||
"\\}": "}", // Escaped right curly brace
|
||||
"\\$": "$", // Escaped dollar sign
|
||||
"\\&": "&", // Escaped ampersand
|
||||
"\\;": ";", // Escaped semicolon
|
||||
"\\'": "'", // Escaped single quote
|
||||
"\\\\": "\\", // Escaped backslash
|
||||
"\\*": "*", // Escaped asterisk
|
||||
"\\?": "?", // Escaped question mark
|
||||
}
|
||||
|
||||
for escaped, actual := range replacements {
|
||||
fp = strings.ReplaceAll(fp, escaped, actual)
|
||||
}
|
||||
return fp
|
||||
}
|
||||
|
||||
func extractFileNames(input string) []string {
|
||||
// Regex to match file paths starting with / or ./ and include escaped spaces (\ or %20)
|
||||
// and followed by more characters and a file extension
|
||||
regexPattern := `(?:\./|/)[\S\\ ]+?\.(?i:jpg|jpeg|png|svg)\b`
|
||||
re := regexp.MustCompile(regexPattern)
|
||||
|
||||
return re.FindAllString(input, -1)
|
||||
}
|
||||
|
||||
func extractFileData(input string) (string, []ImageData, error) {
|
||||
filePaths := extractFileNames(input)
|
||||
var imgs []ImageData
|
||||
|
||||
for _, fp := range filePaths {
|
||||
nfp := normalizeFilePath(fp)
|
||||
data, err := getImageData(nfp)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Couldn't process image: %q\n", err)
|
||||
return "", imgs, err
|
||||
}
|
||||
fmt.Printf("Added image '%s'\n", nfp)
|
||||
input = strings.ReplaceAll(input, fp, "")
|
||||
imgs = append(imgs, data)
|
||||
}
|
||||
return input, imgs, nil
|
||||
}
|
||||
|
||||
func RunServer(cmd *cobra.Command, _ []string) error {
|
||||
host, port, err := net.SplitHostPort(os.Getenv("OLLAMA_HOST"))
|
||||
if err != nil {
|
||||
@@ -1095,50 +634,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
|
||||
return server.Serve(ln)
|
||||
}
|
||||
|
||||
func getImageData(filePath string) ([]byte, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
buf := make([]byte, 512)
|
||||
_, err = file.Read(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
contentType := http.DetectContentType(buf)
|
||||
allowedTypes := []string{"image/jpeg", "image/jpg", "image/svg+xml", "image/png"}
|
||||
if !slices.Contains(allowedTypes, contentType) {
|
||||
return nil, fmt.Errorf("invalid image type: %s", contentType)
|
||||
}
|
||||
|
||||
info, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check if the file size exceeds 100MB
|
||||
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||
if info.Size() > maxSize {
|
||||
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
|
||||
}
|
||||
|
||||
buf = make([]byte, info.Size())
|
||||
_, err = file.Seek(0, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
_, err = io.ReadFull(file, buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func initializeKeypair() error {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
|
||||
532
cmd/interactive.go
Normal file
532
cmd/interactive.go
Normal file
@@ -0,0 +1,532 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/readline"
|
||||
)
|
||||
|
||||
type MultilineState int
|
||||
|
||||
const (
|
||||
MultilineNone MultilineState = iota
|
||||
MultilinePrompt
|
||||
MultilineSystem
|
||||
MultilineTemplate
|
||||
)
|
||||
|
||||
func modelIsMultiModal(cmd *cobra.Command, name string) bool {
|
||||
// get model details
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return false
|
||||
}
|
||||
|
||||
req := api.ShowRequest{Name: name}
|
||||
resp, err := client.Show(cmd.Context(), &req)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return slices.Contains(resp.Details.Families, "clip")
|
||||
}
|
||||
|
||||
func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
multiModal := modelIsMultiModal(cmd, opts.Model)
|
||||
|
||||
// load the model
|
||||
loadOpts := generateOptions{
|
||||
Model: opts.Model,
|
||||
Prompt: "",
|
||||
Images: []ImageData{},
|
||||
}
|
||||
if err := generate(cmd, loadOpts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
usage := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /set Set session variables")
|
||||
fmt.Fprintln(os.Stderr, " /show Show model information")
|
||||
fmt.Fprintln(os.Stderr, " /bye Exit")
|
||||
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
|
||||
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageSet := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter")
|
||||
fmt.Fprintln(os.Stderr, " /set system <string> Set system message")
|
||||
fmt.Fprintln(os.Stderr, " /set template <string> Set prompt template")
|
||||
fmt.Fprintln(os.Stderr, " /set history Enable history")
|
||||
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
|
||||
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set nowordwrap Disable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set format json Enable JSON mode")
|
||||
fmt.Fprintln(os.Stderr, " /set noformat Disable formatting")
|
||||
fmt.Fprintln(os.Stderr, " /set verbose Show LLM stats")
|
||||
fmt.Fprintln(os.Stderr, " /set quiet Disable LLM stats")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageShortcuts := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available keyboard shortcuts:")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + a Move to the beginning of the line (Home)")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + e Move to the end of the line (End)")
|
||||
fmt.Fprintln(os.Stderr, " Alt + b Move back (left) one word")
|
||||
fmt.Fprintln(os.Stderr, " Alt + f Move forward (right) one word")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + k Delete the sentence after the cursor")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + u Delete the sentence before the cursor")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + l Clear the screen")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + c Stop the model from responding")
|
||||
fmt.Fprintln(os.Stderr, " Ctrl + d Exit ollama (/bye)")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
usageShow := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /show info Show details for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show license Show model license")
|
||||
fmt.Fprintln(os.Stderr, " /show modelfile Show Modelfile for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show parameters Show parameters for this model")
|
||||
fmt.Fprintln(os.Stderr, " /show system Show system message")
|
||||
fmt.Fprintln(os.Stderr, " /show template Show prompt template")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
// only list out the most common parameters
|
||||
usageParameters := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Parameters:")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter seed <int> Random number seed")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n <int> Set how far back to look for repetitions")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_gpu <int> The number of layers to send to the GPU")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter stop \"<string>\", ... Set the stop parameters")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
scanner, err := readline.New(readline.Prompt{
|
||||
Prompt: ">>> ",
|
||||
AltPrompt: "... ",
|
||||
Placeholder: "Send a message (/? for help)",
|
||||
AltPlaceholder: `Use """ to end multi-line input`,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Print(readline.StartBracketedPaste)
|
||||
defer fmt.Printf(readline.EndBracketedPaste)
|
||||
|
||||
var multiline MultilineState
|
||||
var prompt string
|
||||
|
||||
for {
|
||||
line, err := scanner.Readline()
|
||||
switch {
|
||||
case errors.Is(err, io.EOF):
|
||||
fmt.Println()
|
||||
return nil
|
||||
case errors.Is(err, readline.ErrInterrupt):
|
||||
if line == "" {
|
||||
fmt.Println("\nUse Ctrl + d or /bye to exit.")
|
||||
}
|
||||
|
||||
scanner.Prompt.UseAlt = false
|
||||
prompt = ""
|
||||
|
||||
continue
|
||||
case err != nil:
|
||||
return err
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(prompt, `"""`):
|
||||
// if the prompt so far starts with """ then we're in multiline mode
|
||||
// and we need to keep reading until we find a line that ends with """
|
||||
cut, found := strings.CutSuffix(line, `"""`)
|
||||
prompt += cut
|
||||
|
||||
if !found {
|
||||
prompt += "\n"
|
||||
continue
|
||||
}
|
||||
|
||||
prompt = strings.TrimPrefix(prompt, `"""`)
|
||||
scanner.Prompt.UseAlt = false
|
||||
|
||||
switch multiline {
|
||||
case MultilineSystem:
|
||||
opts.System = prompt
|
||||
prompt = ""
|
||||
fmt.Println("Set system message.")
|
||||
case MultilineTemplate:
|
||||
opts.Template = prompt
|
||||
prompt = ""
|
||||
fmt.Println("Set prompt template.")
|
||||
}
|
||||
multiline = MultilineNone
|
||||
case strings.HasPrefix(line, `"""`) && len(prompt) == 0:
|
||||
scanner.Prompt.UseAlt = true
|
||||
multiline = MultilinePrompt
|
||||
prompt += line + "\n"
|
||||
continue
|
||||
case scanner.Pasting:
|
||||
prompt += line + "\n"
|
||||
continue
|
||||
case strings.HasPrefix(line, "/list"):
|
||||
args := strings.Fields(line)
|
||||
if err := ListHandler(cmd, args[1:]); err != nil {
|
||||
return err
|
||||
}
|
||||
case strings.HasPrefix(line, "/set"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
switch args[1] {
|
||||
case "history":
|
||||
scanner.HistoryEnable()
|
||||
case "nohistory":
|
||||
scanner.HistoryDisable()
|
||||
case "wordwrap":
|
||||
opts.WordWrap = true
|
||||
fmt.Println("Set 'wordwrap' mode.")
|
||||
case "nowordwrap":
|
||||
opts.WordWrap = false
|
||||
fmt.Println("Set 'nowordwrap' mode.")
|
||||
case "verbose":
|
||||
cmd.Flags().Set("verbose", "true")
|
||||
fmt.Println("Set 'verbose' mode.")
|
||||
case "quiet":
|
||||
cmd.Flags().Set("verbose", "false")
|
||||
fmt.Println("Set 'quiet' mode.")
|
||||
case "format":
|
||||
if len(args) < 3 || args[2] != "json" {
|
||||
fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
|
||||
} else {
|
||||
opts.Format = args[2]
|
||||
fmt.Printf("Set format to '%s' mode.\n", args[2])
|
||||
}
|
||||
case "noformat":
|
||||
opts.Format = ""
|
||||
fmt.Println("Disabled format.")
|
||||
case "parameter":
|
||||
if len(args) < 4 {
|
||||
usageParameters()
|
||||
continue
|
||||
}
|
||||
var params []string
|
||||
for _, p := range args[3:] {
|
||||
params = append(params, p)
|
||||
}
|
||||
fp, err := api.FormatParams(map[string][]string{args[2]: params})
|
||||
if err != nil {
|
||||
fmt.Printf("Couldn't set parameter: %q\n\n", err)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Set parameter '%s' to '%s'\n\n", args[2], strings.Join(params, ", "))
|
||||
opts.Options[args[2]] = fp[args[2]]
|
||||
case "system", "template":
|
||||
if len(args) < 3 {
|
||||
usageSet()
|
||||
continue
|
||||
}
|
||||
line := strings.Join(args[2:], " ")
|
||||
line = strings.TrimPrefix(line, `"""`)
|
||||
if strings.HasPrefix(args[2], `"""`) {
|
||||
cut, found := strings.CutSuffix(line, `"""`)
|
||||
prompt += cut
|
||||
if found {
|
||||
if args[1] == "system" {
|
||||
opts.System = prompt
|
||||
fmt.Println("Set system message.")
|
||||
} else {
|
||||
opts.Template = prompt
|
||||
fmt.Println("Set prompt template.")
|
||||
}
|
||||
prompt = ""
|
||||
} else {
|
||||
prompt = `"""` + prompt + "\n"
|
||||
if args[1] == "system" {
|
||||
multiline = MultilineSystem
|
||||
} else {
|
||||
multiline = MultilineTemplate
|
||||
}
|
||||
scanner.Prompt.UseAlt = true
|
||||
}
|
||||
} else {
|
||||
opts.System = line
|
||||
fmt.Println("Set system message.")
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
} else {
|
||||
usageSet()
|
||||
}
|
||||
case strings.HasPrefix(line, "/show"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return err
|
||||
}
|
||||
req := &api.ShowRequest{
|
||||
Name: opts.Model,
|
||||
System: opts.System,
|
||||
Template: opts.Template,
|
||||
Options: opts.Options,
|
||||
}
|
||||
resp, err := client.Show(cmd.Context(), req)
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't get model")
|
||||
return err
|
||||
}
|
||||
|
||||
switch args[1] {
|
||||
case "info":
|
||||
fmt.Println("Model details:")
|
||||
if len(resp.Details.Families) > 0 {
|
||||
fmt.Printf("Family %s\n", strings.Join(resp.Details.Families, ", "))
|
||||
} else if resp.Details.Family != "" {
|
||||
fmt.Printf("Family %s\n", resp.Details.Family)
|
||||
}
|
||||
fmt.Printf("Parameter Size %s\n", resp.Details.ParameterSize)
|
||||
fmt.Printf("Quantization Level %s\n", resp.Details.QuantizationLevel)
|
||||
fmt.Println("")
|
||||
case "license":
|
||||
if resp.License == "" {
|
||||
fmt.Print("No license was specified for this model.\n\n")
|
||||
} else {
|
||||
fmt.Println(resp.License)
|
||||
}
|
||||
case "modelfile":
|
||||
fmt.Println(resp.Modelfile)
|
||||
case "parameters":
|
||||
if resp.Parameters == "" {
|
||||
fmt.Print("No parameters were specified for this model.\n\n")
|
||||
} else {
|
||||
if len(opts.Options) > 0 {
|
||||
fmt.Println("User defined parameters:")
|
||||
for k, v := range opts.Options {
|
||||
fmt.Printf("%-*s %v\n", 30, k, v)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
fmt.Println("Model defined parameters:")
|
||||
fmt.Println(resp.Parameters)
|
||||
}
|
||||
case "system":
|
||||
switch {
|
||||
case opts.System != "":
|
||||
fmt.Println(opts.System + "\n")
|
||||
case resp.System != "":
|
||||
fmt.Println(resp.System + "\n")
|
||||
default:
|
||||
fmt.Print("No system message was specified for this model.\n\n")
|
||||
}
|
||||
case "template":
|
||||
switch {
|
||||
case opts.Template != "":
|
||||
fmt.Println(opts.Template + "\n")
|
||||
case resp.Template != "":
|
||||
fmt.Println(resp.Template)
|
||||
default:
|
||||
fmt.Print("No prompt template was specified for this model.\n\n")
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
} else {
|
||||
usageShow()
|
||||
}
|
||||
case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
|
||||
args := strings.Fields(line)
|
||||
if len(args) > 1 {
|
||||
switch args[1] {
|
||||
case "set", "/set":
|
||||
usageSet()
|
||||
case "show", "/show":
|
||||
usageShow()
|
||||
case "shortcut", "shortcuts":
|
||||
usageShortcuts()
|
||||
}
|
||||
} else {
|
||||
usage()
|
||||
}
|
||||
case line == "/exit", line == "/bye":
|
||||
return nil
|
||||
case strings.HasPrefix(line, "/"):
|
||||
args := strings.Fields(line)
|
||||
isFile := false
|
||||
|
||||
if multiModal {
|
||||
for _, f := range extractFileNames(line) {
|
||||
if strings.HasPrefix(f, args[0]) {
|
||||
isFile = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isFile {
|
||||
prompt += line
|
||||
} else {
|
||||
fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
|
||||
continue
|
||||
}
|
||||
default:
|
||||
prompt += line
|
||||
}
|
||||
|
||||
if len(prompt) > 0 && multiline == MultilineNone {
|
||||
opts.Prompt = prompt
|
||||
if multiModal {
|
||||
newPrompt, images, err := extractFileData(prompt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.Prompt = newPrompt
|
||||
|
||||
// reset the context if we find another image
|
||||
if len(images) > 0 {
|
||||
opts.Images = images
|
||||
ctx := cmd.Context()
|
||||
ctx = context.WithValue(ctx, generateContextKey("context"), []int{})
|
||||
cmd.SetContext(ctx)
|
||||
}
|
||||
if len(opts.Images) == 0 {
|
||||
fmt.Println("This model requires you to add a jpeg, png, or svg image.")
|
||||
fmt.Println()
|
||||
prompt = ""
|
||||
continue
|
||||
}
|
||||
}
|
||||
if err := generate(cmd, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
prompt = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeFilePath(fp string) string {
|
||||
// Define a map of escaped characters and their replacements
|
||||
replacements := map[string]string{
|
||||
"\\ ": " ", // Escaped space
|
||||
"\\(": "(", // Escaped left parenthesis
|
||||
"\\)": ")", // Escaped right parenthesis
|
||||
"\\[": "[", // Escaped left square bracket
|
||||
"\\]": "]", // Escaped right square bracket
|
||||
"\\{": "{", // Escaped left curly brace
|
||||
"\\}": "}", // Escaped right curly brace
|
||||
"\\$": "$", // Escaped dollar sign
|
||||
"\\&": "&", // Escaped ampersand
|
||||
"\\;": ";", // Escaped semicolon
|
||||
"\\'": "'", // Escaped single quote
|
||||
"\\\\": "\\", // Escaped backslash
|
||||
"\\*": "*", // Escaped asterisk
|
||||
"\\?": "?", // Escaped question mark
|
||||
}
|
||||
|
||||
for escaped, actual := range replacements {
|
||||
fp = strings.ReplaceAll(fp, escaped, actual)
|
||||
}
|
||||
return fp
|
||||
}
|
||||
|
||||
func extractFileNames(input string) []string {
|
||||
// Regex to match file paths starting with / or ./ and include escaped spaces (\ or %20)
|
||||
// and followed by more characters and a file extension
|
||||
regexPattern := `(?:\./|/)[\S\\ ]+?\.(?i:jpg|jpeg|png|svg)\b`
|
||||
re := regexp.MustCompile(regexPattern)
|
||||
|
||||
return re.FindAllString(input, -1)
|
||||
}
|
||||
|
||||
func extractFileData(input string) (string, []ImageData, error) {
|
||||
filePaths := extractFileNames(input)
|
||||
var imgs []ImageData
|
||||
|
||||
for _, fp := range filePaths {
|
||||
nfp := normalizeFilePath(fp)
|
||||
data, err := getImageData(nfp)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Couldn't process image: %q\n", err)
|
||||
return "", imgs, err
|
||||
}
|
||||
fmt.Printf("Added image '%s'\n", nfp)
|
||||
input = strings.ReplaceAll(input, fp, "")
|
||||
imgs = append(imgs, data)
|
||||
}
|
||||
return input, imgs, nil
|
||||
}
|
||||
|
||||
func getImageData(filePath string) ([]byte, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
buf := make([]byte, 512)
|
||||
_, err = file.Read(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
contentType := http.DetectContentType(buf)
|
||||
allowedTypes := []string{"image/jpeg", "image/jpg", "image/svg+xml", "image/png"}
|
||||
if !slices.Contains(allowedTypes, contentType) {
|
||||
return nil, fmt.Errorf("invalid image type: %s", contentType)
|
||||
}
|
||||
|
||||
info, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check if the file size exceeds 100MB
|
||||
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
|
||||
if info.Size() > maxSize {
|
||||
return nil, fmt.Errorf("file size exceeds maximum limit (100MB)")
|
||||
}
|
||||
|
||||
buf = make([]byte, info.Size())
|
||||
_, err = file.Seek(0, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
_, err = io.ReadFull(file, buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return buf, nil
|
||||
}
|
||||
@@ -12,7 +12,7 @@ Import models using source model weights found on Hugging Face and similar sites
|
||||
|
||||
Installing on Linux in most cases is easy using the script on Ollama.ai. To get more detail about the install, including CUDA drivers, see the **[Linux Documentation](./linux.md)**.
|
||||
|
||||
Many of our users like the flexibility of using our official Docker Image. Learn more about using Docker with Ollama using the **[Docker Documentation](./docker.md)**.
|
||||
Many of our users like the flexibility of using our official Docker Image. Learn more about using Docker with Ollama using the **[Docker Documentation](https://hub.docker.com/r/ollama/ollama)**.
|
||||
|
||||
It is easy to install on Linux and Mac, but many users will choose to build Ollama on their own. To do this, refer to the **[Development Documentation](./development.md)**.
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ const char *cuda_lib_paths[] = {
|
||||
};
|
||||
#endif
|
||||
|
||||
#define CUDA_LOOKUP_SIZE 5
|
||||
|
||||
void cuda_init(cuda_init_resp_t *resp) {
|
||||
nvmlReturn_t ret;
|
||||
resp->err = NULL;
|
||||
@@ -30,11 +32,12 @@ void cuda_init(cuda_init_resp_t *resp) {
|
||||
struct lookup {
|
||||
char *s;
|
||||
void **p;
|
||||
} l[4] = {
|
||||
} l[CUDA_LOOKUP_SIZE] = {
|
||||
{"nvmlInit_v2", (void *)&resp->ch.initFn},
|
||||
{"nvmlShutdown", (void *)&resp->ch.shutdownFn},
|
||||
{"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle},
|
||||
{"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo},
|
||||
{"nvmlDeviceGetCount_v2", (void *)&resp->ch.getCount},
|
||||
};
|
||||
|
||||
for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
|
||||
@@ -52,7 +55,7 @@ void cuda_init(cuda_init_resp_t *resp) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) { // TODO - fix this to use a null terminated list
|
||||
for (i = 0; i < CUDA_LOOKUP_SIZE; i++) { // TODO - fix this to use a null terminated list
|
||||
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
|
||||
if (!l[i].p) {
|
||||
UNLOAD_LIBRARY(resp->ch.handle);
|
||||
@@ -89,22 +92,34 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO - handle multiple GPUs
|
||||
ret = (*h.getHandle)(0, &device);
|
||||
unsigned int devices;
|
||||
ret = (*h.getCount)(&devices);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "unable to get device handle: %d", ret);
|
||||
snprintf(buf, buflen, "unable to get device count: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = (*h.getMemInfo)(device, &memInfo);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "device memory info lookup failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
resp->total = 0;
|
||||
resp->free = 0;
|
||||
|
||||
for (i = 0; i < devices; i++) {
|
||||
ret = (*h.getHandle)(i, &device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "unable to get device handle %d: %d", i, ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = (*h.getMemInfo)(device, &memInfo);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "device memory info lookup failure %d: %d", i, ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
resp->total += memInfo.total;
|
||||
resp->free += memInfo.free;
|
||||
}
|
||||
resp->total = memInfo.total;
|
||||
resp->free = memInfo.free;
|
||||
return;
|
||||
}
|
||||
#endif // __APPLE__
|
||||
@@ -21,6 +21,7 @@ typedef struct cuda_handle {
|
||||
nvmlReturn_t (*shutdownFn)(void);
|
||||
nvmlReturn_t (*getHandle)(unsigned int, nvmlDevice_t *);
|
||||
nvmlReturn_t (*getMemInfo)(nvmlDevice_t, nvmlMemory_t *);
|
||||
nvmlReturn_t (*getCount)(unsigned int *);
|
||||
} cuda_handle_t;
|
||||
|
||||
typedef struct cuda_init_resp {
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
set(TARGET ext_server)
|
||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||
add_library(${TARGET} STATIC ../../../ext_server.cpp)
|
||||
add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp)
|
||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
||||
target_include_directories(${TARGET} PRIVATE ../..)
|
||||
target_include_directories(${TARGET} PRIVATE ../../..)
|
||||
4
llm/ext_server/README.md
Normal file
4
llm/ext_server/README.md
Normal file
@@ -0,0 +1,4 @@
|
||||
# Extern C Server
|
||||
|
||||
This directory contains a thin facade we layer on top of the Llama.cpp server
|
||||
to expose `extern C` interfaces to access the functionality through direct API calls in-process
|
||||
@@ -1,7 +1,7 @@
|
||||
package llm
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/gguf -I${SRCDIR}/llama.cpp/gguf/common -I${SRCDIR}/llama.cpp/gguf/examples/server
|
||||
#cgo CFLAGS: -I${SRCDIR}/ext_server -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/common -I${SRCDIR}/llama.cpp/examples/server
|
||||
#cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
|
||||
#cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
|
||||
#cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
|
||||
@@ -10,17 +10,17 @@ package llm
|
||||
#cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
|
||||
#cgo darwin LDFLAGS: -lc++ -framework Accelerate
|
||||
#cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libcommon.a
|
||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libext_server.a
|
||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libllama.a
|
||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libggml_static.a
|
||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libcommon.a
|
||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libext_server.a
|
||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libllama.a
|
||||
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libggml_static.a
|
||||
#cgo linux CFLAGS: -D_GNU_SOURCE
|
||||
#cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
|
||||
#cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
|
||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libext_server.a
|
||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libcommon.a
|
||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libllama.a
|
||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libggml_static.a
|
||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libext_server.a
|
||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libcommon.a
|
||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libllama.a
|
||||
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libggml_static.a
|
||||
#cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm
|
||||
#cgo linux windows LDFLAGS: -lpthread
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# common logic accross linux and darwin
|
||||
|
||||
init_vars() {
|
||||
LLAMACPP_DIR=gguf
|
||||
PATCHES="0001-Expose-callable-API-for-server.patch"
|
||||
LLAMACPP_DIR=../llama.cpp
|
||||
CMAKE_DEFS=""
|
||||
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
|
||||
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
|
||||
@@ -18,19 +17,24 @@ git_module_setup() {
|
||||
echo "Skipping submodule initialization"
|
||||
return
|
||||
fi
|
||||
# Make sure the tree is clean after the directory moves
|
||||
if [ -d "${LLAMACPP_DIR}/gguf" ]; then
|
||||
echo "Cleaning up old submodule"
|
||||
rm -rf ${LLAMACPP_DIR}
|
||||
fi
|
||||
git submodule init
|
||||
git submodule update --force gguf
|
||||
git submodule update --force ${LLAMACPP_DIR}
|
||||
|
||||
}
|
||||
|
||||
apply_patches() {
|
||||
# Wire up our CMakefile
|
||||
if ! grep ollama gguf/examples/server/CMakeLists.txt; then
|
||||
echo 'include (../../../CMakeLists.txt) # ollama' >>gguf/examples/server/CMakeLists.txt
|
||||
if ! grep ollama ${LLAMACPP_DIR}/examples/server/CMakeLists.txt; then
|
||||
echo 'include (../../../ext_server/CMakeLists.txt) # ollama' >>${LLAMACPP_DIR}/examples/server/CMakeLists.txt
|
||||
fi
|
||||
# Avoid duplicate main symbols when we link into the cgo binary
|
||||
sed -e 's/int main(/int __main(/g' <./gguf/examples/server/server.cpp >./gguf/examples/server/server.cpp.tmp &&
|
||||
mv ./gguf/examples/server/server.cpp.tmp ./gguf/examples/server/server.cpp
|
||||
sed -e 's/int main(/int __main(/g' <${LLAMACPP_DIR}/examples/server/server.cpp >${LLAMACPP_DIR}/examples/server/server.cpp.tmp &&
|
||||
mv ${LLAMACPP_DIR}/examples/server/server.cpp.tmp ${LLAMACPP_DIR}/examples/server/server.cpp
|
||||
}
|
||||
|
||||
build() {
|
||||
@@ -49,5 +53,5 @@ install() {
|
||||
|
||||
# Keep the local tree clean after we're done with the build
|
||||
cleanup() {
|
||||
(cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp)
|
||||
(cd ${LLAMACPP_DIR}/examples/server/ && git checkout CMakeLists.txt server.cpp)
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
# This script is intended to run inside the go generate
|
||||
# working directory must be ../llm/llama.cpp
|
||||
# working directory must be ./llm/generate/
|
||||
|
||||
# TODO - add hardening to detect missing tools (cmake, etc.)
|
||||
|
||||
@@ -10,7 +10,7 @@ echo "Starting darwin generate script"
|
||||
source $(dirname $0)/gen_common.sh
|
||||
init_vars
|
||||
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
|
||||
BUILD_DIR="gguf/build/darwin/metal"
|
||||
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
|
||||
case "${GOARCH}" in
|
||||
"amd64")
|
||||
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
# This script is intended to run inside the go generate
|
||||
# working directory must be llm/llama.cpp
|
||||
# working directory must be llm/generate/
|
||||
|
||||
# First we build our default built-in library which will be linked into the CGO
|
||||
# binary as a normal dependency. This default build is CPU based.
|
||||
@@ -52,7 +52,7 @@ apply_patches
|
||||
# CPU first for the default library
|
||||
#
|
||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
||||
BUILD_DIR="gguf/build/linux/cpu"
|
||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
|
||||
|
||||
build
|
||||
install
|
||||
@@ -64,7 +64,7 @@ if [ -d /usr/local/cuda/lib64/ ]; then
|
||||
echo "CUDA libraries detected - building dynamic CUDA library"
|
||||
init_vars
|
||||
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
||||
BUILD_DIR="gguf/build/linux/cuda"
|
||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda"
|
||||
CUDA_LIB_DIR=/usr/local/cuda/lib64
|
||||
build
|
||||
install
|
||||
@@ -98,7 +98,7 @@ if [ -d "${ROCM_PATH}" ]; then
|
||||
echo "ROCm libraries detected - building dynamic ROCm library"
|
||||
init_vars
|
||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
||||
BUILD_DIR="gguf/build/linux/rocm"
|
||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm"
|
||||
build
|
||||
install
|
||||
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
|
||||
@@ -3,7 +3,7 @@
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
function init_vars {
|
||||
$script:patches = @("0001-Expose-callable-API-for-server.patch")
|
||||
$script:llamacppDir = "../llama.cpp"
|
||||
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
|
||||
$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
|
||||
if ($env:CGO_CFLAGS -contains "-g") {
|
||||
@@ -19,25 +19,25 @@ function git_module_setup {
|
||||
# TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
|
||||
& git submodule init
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& git submodule update --force gguf
|
||||
& git submodule update --force "${script:llamacppDir}"
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
|
||||
function apply_patches {
|
||||
# Wire up our CMakefile
|
||||
if (!(Select-String -Path "gguf/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
|
||||
Add-Content -Path "gguf/examples/server/CMakeLists.txt" -Value 'include (../../../CMakeLists.txt) # ollama'
|
||||
if (!(Select-String -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
|
||||
Add-Content -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Value 'include (../../../ext_server/CMakeLists.txt) # ollama'
|
||||
}
|
||||
# Avoid duplicate main symbols when we link into the cgo binary
|
||||
$content = Get-Content -Path "./gguf/examples/server/server.cpp"
|
||||
$content = Get-Content -Path "${script:llamacppDir}/examples/server/server.cpp"
|
||||
$content = $content -replace 'int main\(', 'int __main('
|
||||
Set-Content -Path "./gguf/examples/server/server.cpp" -Value $content
|
||||
Set-Content -Path "${script:llamacppDir}/examples/server/server.cpp" -Value $content
|
||||
}
|
||||
|
||||
function build {
|
||||
write-host "generating config with: cmake -S gguf -B $script:buildDir $script:cmakeDefs"
|
||||
write-host "generating config with: cmake -S ${script:llamacppDir} -B $script:buildDir $script:cmakeDefs"
|
||||
& cmake --version
|
||||
& cmake -S gguf -B $script:buildDir $script:cmakeDefs
|
||||
& cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
write-host "building with: cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })"
|
||||
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
|
||||
@@ -55,7 +55,7 @@ function install {
|
||||
}
|
||||
|
||||
function cleanup {
|
||||
Set-Location "gguf/examples/server"
|
||||
Set-Location "${script:llamacppDir}/examples/server"
|
||||
git checkout CMakeLists.txt server.cpp
|
||||
}
|
||||
|
||||
@@ -64,20 +64,20 @@ git_module_setup
|
||||
apply_patches
|
||||
|
||||
# first build CPU based
|
||||
$script:buildDir="gguf/build/windows/cpu"
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/cpu"
|
||||
|
||||
build
|
||||
install
|
||||
|
||||
# Then build cuda as a dynamically loaded library
|
||||
init_vars
|
||||
$script:buildDir="gguf/build/windows/cuda"
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/cuda"
|
||||
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")
|
||||
build
|
||||
install
|
||||
|
||||
# TODO - actually implement ROCm support on windows
|
||||
$script:buildDir="gguf/build/windows/rocm"
|
||||
$script:buildDir="${script:llamacppDir}/build/windows/rocm"
|
||||
|
||||
rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
|
||||
md "${script:buildDir}/lib" -ea 0 > $null
|
||||
@@ -1,3 +1,3 @@
|
||||
package llm
|
||||
package generate
|
||||
|
||||
//go:generate sh ./gen_darwin.sh
|
||||
@@ -1,3 +1,3 @@
|
||||
package llm
|
||||
package generate
|
||||
|
||||
//go:generate bash ./gen_linux.sh
|
||||
@@ -1,3 +1,3 @@
|
||||
package llm
|
||||
package generate
|
||||
|
||||
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
|
||||
@@ -13,7 +13,7 @@ import (
|
||||
"github.com/jmorganca/ollama/api"
|
||||
)
|
||||
|
||||
//go:embed llama.cpp/gguf/ggml-metal.metal
|
||||
//go:embed llama.cpp/ggml-metal.metal
|
||||
var libEmbed embed.FS
|
||||
|
||||
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||
@@ -22,7 +22,7 @@ func newDynamicShimExtServer(library, model string, adapters, projectors []strin
|
||||
}
|
||||
|
||||
func nativeInit(workdir string) error {
|
||||
err := extractPayloadFiles(workdir, "llama.cpp/gguf/ggml-metal.metal")
|
||||
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
|
||||
if err != nil {
|
||||
if err == payloadMissing {
|
||||
// TODO perhaps consider this a hard failure on arm macs?
|
||||
|
||||
@@ -34,6 +34,8 @@ type shimExtServer struct {
|
||||
var shimMutex sync.Mutex
|
||||
var llm *shimExtServer
|
||||
|
||||
const pathComponentCount = 6
|
||||
|
||||
func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
|
||||
C.dynamic_shim_llama_server_init(llm.s, sparams, err)
|
||||
}
|
||||
@@ -112,7 +114,7 @@ func (llm *shimExtServer) Close() {
|
||||
}
|
||||
|
||||
func nativeInit(workdir string) error {
|
||||
libs, err := extractDynamicLibs(workdir, "llama.cpp/gguf/build/*/*/lib/*")
|
||||
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
|
||||
if err != nil {
|
||||
if err == payloadMissing {
|
||||
log.Printf("%s", payloadMissing)
|
||||
@@ -151,13 +153,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
|
||||
|
||||
for _, file := range files {
|
||||
pathComps := strings.Split(file, "/")
|
||||
if len(pathComps) != 7 {
|
||||
if len(pathComps) != pathComponentCount {
|
||||
log.Printf("unexpected payload components: %v", pathComps)
|
||||
continue
|
||||
}
|
||||
// llama.cpp/gguf/build/$OS/$VARIANT/lib/$LIBRARY
|
||||
// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
|
||||
// Include the variant in the path to avoid conflicts between multiple server libs
|
||||
targetDir := filepath.Join(workDir, pathComps[4])
|
||||
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
|
||||
srcFile, err := libEmbed.Open(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read payload %s: %v", file, err)
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
//go:embed llama.cpp/gguf/build/*/*/lib/*.so
|
||||
//go:embed llama.cpp/build/*/*/lib/*.so
|
||||
var libEmbed embed.FS
|
||||
|
||||
func updatePath(dir string) {
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
//go:embed llama.cpp/gguf/build/windows/*/lib/*.dll
|
||||
//go:embed llama.cpp/build/windows/*/lib/*.dll
|
||||
var libEmbed embed.FS
|
||||
|
||||
func updatePath(dir string) {
|
||||
|
||||
63
parser/parser_test.go
Normal file
63
parser/parser_test.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func Test_Parser(t *testing.T) {
|
||||
|
||||
input := `
|
||||
FROM model1
|
||||
ADAPTER adapter1
|
||||
LICENSE MIT
|
||||
PARAMETER param1 value1
|
||||
PARAMETER param2 value2
|
||||
TEMPLATE template1
|
||||
`
|
||||
|
||||
reader := strings.NewReader(input)
|
||||
|
||||
commands, err := Parse(reader)
|
||||
assert.Nil(t, err)
|
||||
|
||||
expectedCommands := []Command{
|
||||
{Name: "model", Args: "model1"},
|
||||
{Name: "adapter", Args: "adapter1"},
|
||||
{Name: "license", Args: "MIT"},
|
||||
{Name: "param1", Args: "value1"},
|
||||
{Name: "param2", Args: "value2"},
|
||||
{Name: "template", Args: "template1"},
|
||||
}
|
||||
|
||||
assert.Equal(t, expectedCommands, commands)
|
||||
}
|
||||
|
||||
func Test_Parser_NoFromLine(t *testing.T) {
|
||||
|
||||
input := `
|
||||
PARAMETER param1 value1
|
||||
PARAMETER param2 value2
|
||||
`
|
||||
|
||||
reader := strings.NewReader(input)
|
||||
|
||||
_, err := Parse(reader)
|
||||
assert.ErrorContains(t, err, "no FROM line")
|
||||
}
|
||||
|
||||
func Test_Parser_MissingValue(t *testing.T) {
|
||||
|
||||
input := `
|
||||
FROM foo
|
||||
PARAMETER param1
|
||||
`
|
||||
|
||||
reader := strings.NewReader(input)
|
||||
|
||||
_, err := Parse(reader)
|
||||
assert.ErrorContains(t, err, "missing value for [param1]")
|
||||
|
||||
}
|
||||
@@ -8,7 +8,7 @@ export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version
|
||||
mkdir -p dist
|
||||
|
||||
for TARGETARCH in amd64 arm64; do
|
||||
docker build --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
|
||||
docker build --platform=linux/$TARGETARCH --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
|
||||
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
|
||||
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
|
||||
docker rm builder-$TARGETARCH
|
||||
|
||||
@@ -610,12 +610,18 @@ func ShowModelHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if req.Name == "" {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
|
||||
switch {
|
||||
case req.Model == "" && req.Name == "":
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
|
||||
return
|
||||
case req.Model != "" && req.Name != "":
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "both model and name are set"})
|
||||
return
|
||||
case req.Model == "" && req.Name != "":
|
||||
req.Model = req.Name
|
||||
}
|
||||
|
||||
resp, err := GetModelInfo(req.Name)
|
||||
resp, err := GetModelInfo(req)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
|
||||
@@ -628,8 +634,8 @@ func ShowModelHandler(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||
model, err := GetModel(name)
|
||||
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
||||
model, err := GetModel(req.Model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -642,6 +648,14 @@ func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||
QuantizationLevel: model.Config.FileType,
|
||||
}
|
||||
|
||||
if req.System != "" {
|
||||
model.System = req.System
|
||||
}
|
||||
|
||||
if req.Template != "" {
|
||||
model.Template = req.Template
|
||||
}
|
||||
|
||||
resp := &api.ShowResponse{
|
||||
License: strings.Join(model.License, "\n"),
|
||||
System: model.System,
|
||||
@@ -649,13 +663,6 @@ func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||
Details: modelDetails,
|
||||
}
|
||||
|
||||
mf, err := ShowModelfile(model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp.Modelfile = mf
|
||||
|
||||
var params []string
|
||||
cs := 30
|
||||
for k, v := range model.Options {
|
||||
@@ -685,6 +692,19 @@ func GetModelInfo(name string) (*api.ShowResponse, error) {
|
||||
}
|
||||
resp.Parameters = strings.Join(params, "\n")
|
||||
|
||||
for k, v := range req.Options {
|
||||
if _, ok := req.Options[k]; ok {
|
||||
model.Options[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
mf, err := ShowModelfile(model)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp.Modelfile = mf
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user