Compare commits

..

33 Commits
v0.1 ... v0.5

Author SHA1 Message Date
Ettore Di Giacinto
b7c0a108f5 Update README.md 2023-04-05 22:28:03 +02:00
Ettore Di Giacinto
f694a89c28 Update README.md 2023-04-05 22:14:00 +02:00
Ettore Di Giacinto
be682e6c2f Update README.md
Add short-term roadmap and mention webui
2023-04-05 22:04:35 +02:00
mudler
bf85a31f9e Don't set a default model path 2023-04-05 22:00:15 +02:00
Ettore Di Giacinto
d69048e0b0 Update README.md 2023-04-05 00:41:02 +02:00
mudler
827f189163 Update README 2023-03-30 18:46:11 +02:00
mudler
a23deb5ec7 Drop duplicate target 2023-03-29 19:44:41 +02:00
mudler
999676b106 Add gpt4all instructions 2023-03-29 18:58:54 +02:00
mudler
c61b023bc8 Drop fat images, will document how to consume models 2023-03-29 18:55:24 +02:00
mudler
650a22aef1 Add compatibility to gpt4all models 2023-03-29 18:53:24 +02:00
mudler
17b1724f7c Update llama-go 2023-03-27 01:18:14 +02:00
mudler
e860e62036 Add mutex, build only lite images 2023-03-27 01:01:38 +02:00
Ettore Di Giacinto
1f45ff8cd6 Update README.md 2023-03-26 23:37:26 +02:00
mudler
abee34f60a Cleanup leftover 2023-03-25 01:10:50 +01:00
mudler
dbc70dc13c Add a simple web-page as index of the API for helping with inference testing 2023-03-25 01:09:51 +01:00
mudler
55142065eb Update README with building instructions 2023-03-24 01:11:13 +01:00
mudler
d83d2293b5 Update version in kubernetes deployment 2023-03-23 23:22:43 +01:00
mudler
467ce5a7aa Update models download instructions, update images 2023-03-23 22:06:41 +01:00
mudler
4c9c5ce4ce Update README on instruction on how to prompt with the API 2023-03-23 19:25:28 +01:00
mudler
6394d85ca2 Lower conversion parallelism 2023-03-23 19:22:23 +01:00
mudler
2b6a5aef5f Lower earthly parallelism 2023-03-23 19:17:15 +01:00
mudler
d191ecb9fe Disable release pipeline 2023-03-23 19:14:39 +01:00
mudler
e14e1b0a77 Update README 2023-03-23 18:57:25 +01:00
mudler
bffaf2aa42 Build images without model 2023-03-23 18:50:43 +01:00
mudler
d98d1fe55e Use models from model repository 2023-03-23 18:44:24 +01:00
mudler
0785cb6b0b Update README with 13B and 30B model instructions 2023-03-22 00:18:48 +01:00
mudler
f88d5ad829 Update MODEL_URL 2023-03-21 22:03:20 +01:00
Ettore Di Giacinto
c7119a2882 Use tagged image in kubernetes deployment 2023-03-21 21:33:11 +01:00
mudler
8324402b49 Add interactive.go 2023-03-21 19:21:58 +01:00
mudler
9ba30c9c44 Update llama-go, allow to set context-size and enable alpaca model by default 2023-03-21 19:20:23 +01:00
mudler
973042bb4c Update README to use tagged container images 2023-03-21 18:45:59 +01:00
mudler
3ed2888646 Update README 2023-03-20 23:26:29 +01:00
mudler
593ff6308c Add simple client 2023-03-20 23:25:39 +01:00
13 changed files with 726 additions and 81 deletions

View File

@@ -84,4 +84,6 @@ jobs:
- uses: earthly/actions/setup-earthly@v1
- name: Build
run: |
earthly config "global.conversion_parallelism" "1"
earthly config "global.buildkit_max_parallelism" "1"
earthly --push +image-all --IMAGE=${{ steps.prep.outputs.image }}

View File

@@ -11,13 +11,6 @@ go-deps:
SAVE ARTIFACT go.mod AS LOCAL go.mod
SAVE ARTIFACT go.sum AS LOCAL go.sum
alpaca-model:
FROM alpine
# This is the alpaca.cpp model https://github.com/antimatter15/alpaca.cpp
ARG MODEL_URL=https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC
RUN wget -O model.bin -c https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC
SAVE ARTIFACT model.bin AS LOCAL model.bin
build:
FROM +go-deps
WORKDIR /build
@@ -29,12 +22,10 @@ build:
image:
FROM +go-deps
ARG IMAGE=alpaca-cli
COPY +alpaca-model/model.bin /model.bin
ARG IMAGE=alpaca-cli-nomodel
COPY +build/llama-cli /llama-cli
ENV MODEL_PATH=/model.bin
ENTRYPOINT [ "/llama-cli" ]
SAVE IMAGE --push $IMAGE
image-all:
BUILD --platform=linux/amd64 --platform=linux/arm64 +image
BUILD --platform=linux/amd64 --platform=linux/arm64 +image

161
README.md
View File

@@ -1,14 +1,16 @@
## :camel: llama-cli
llama-cli is a straightforward golang CLI interface for [llama.cpp](https://github.com/ggerganov/llama.cpp), providing a simple API and a command line interface that allows text generation using a GPT-based model like llama directly from the terminal.
llama-cli is a straightforward golang CLI interface for [llama.cpp](https://github.com/ggerganov/llama.cpp), providing a simple API and a command line interface that allows text generation using a GPT-based model like llama directly from the terminal. It is also compatible with [gpt4all](https://github.com/nomic-ai/gpt4all) and [alpaca](https://github.com/tatsu-lab/stanford_alpaca).
`llama-cli` uses https://github.com/go-skynet/llama, which is a fork of [llama.cpp](https://github.com/ggerganov/llama.cpp) providing golang binding.
## Container images
The `llama-cli` [container images](https://quay.io/repository/go-skynet/llama-cli?tab=tags&tag=latest) come preloaded with the [alpaca.cpp](https://github.com/antimatter15/alpaca.cpp) model, enabling you to start making predictions immediately! To begin, run:
To begin, run:
```
docker run -ti --rm quay.io/go-skynet/llama-cli:latest --instruction "What's an alpaca?" --topk 10000
docker run -ti --rm quay.io/go-skynet/llama-cli:v0.4 --instruction "What's an alpaca?" --topk 10000 --model ...
```
You will receive a response like the following:
@@ -19,7 +21,7 @@ An alpaca is a member of the South American Camelid family, which includes the l
## Basic usage
To use llama-cli, specify a pre-trained GPT-based model, an input text, and an instruction for text generation. llama-cli takes the following arguments:
To use llama-cli, specify a pre-trained GPT-based model, an input text, and an instruction for text generation. llama-cli takes the following arguments when running from the CLI:
```
llama-cli --model <model_path> --instruction <instruction> [--input <input>] [--template <template_path>] [--tokens <num_tokens>] [--threads <num_threads>] [--temperature <temperature>] [--topp <top_p>] [--topk <top_k>]
@@ -33,10 +35,12 @@ llama-cli --model <model_path> --instruction <instruction> [--input <input>] [--
| model | MODEL_PATH | | The path to the pre-trained GPT-based model. |
| tokens | TOKENS | 128 | The maximum number of tokens to generate. |
| threads | THREADS | NumCPU() | The number of threads to use for text generation. |
| temperature | TEMPERATURE | 0.95 | Sampling temperature for model output. |
| temperature | TEMPERATURE | 0.95 | Sampling temperature for model output. ( values between `0.1` and `1.0` ) |
| top_p | TOP_P | 0.85 | The cumulative probability for top-p sampling. |
| top_k | TOP_K | 20 | The number of top-k tokens to consider for text generation. |
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
| alpaca | ALPACA | true | Set to true for alpaca models. |
| gpt4all | GPT4ALL | false | Set to true for gpt4all models. |
Here's an example of using `llama-cli`:
@@ -48,20 +52,13 @@ This will generate text based on the given model and instruction.
## Advanced usage
`llama-cli` also provides an API for running text generation as a service. You can start the API server using the following command:
`llama-cli` also provides an API for running text generation as a service. The model will be pre-loaded and kept in memory.
Example of starting the API with `docker`:
```bash
docker run -p 8080:8080 -ti --rm quay.io/go-skynet/llama-cli:v0.4 api --context-size 700 --threads 4
```
llama-cli api --model <model_path> [--address <address>] [--threads <num_threads>]
```
The API takes takes the following arguments:
| Parameter | Environment Variable | Default Value | Description |
| ------------ | -------------------- | ------------- | -------------------------------------- |
| model | MODEL_PATH | | The path to the pre-trained GPT-based model. |
| threads | THREADS | CPU cores | The number of threads to use for text generation. |
| address | ADDRESS | :8080 | The address and port to listen on. |
And you'll see:
```
@@ -75,7 +72,25 @@ And you'll see:
└───────────────────────────────────────────────────┘
```
Once the server is running, you can make requests to it using HTTP. For example, to generate text based on an instruction, you can send a POST request to the `/predict` endpoint with the instruction as the request body:
You can control the API server options with command line arguments:
```
llama-cli api --model <model_path> [--address <address>] [--threads <num_threads>]
```
The API takes takes the following:
| Parameter | Environment Variable | Default Value | Description |
| ------------ | -------------------- | ------------- | -------------------------------------- |
| model | MODEL_PATH | | The path to the pre-trained GPT-based model. |
| threads | THREADS | CPU cores | The number of threads to use for text generation. |
| address | ADDRESS | :8080 | The address and port to listen on. |
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
| alpaca | ALPACA | true | Set to true for alpaca models. |
| gpt4all | GPT4ALL | false | Set to true for gpt4all models. |
Once the server is running, you can start making requests to it using HTTP. For example, to generate text based on an instruction, you can send a POST request to the `/predict` endpoint with the instruction as the request body:
```
curl --location --request POST 'http://localhost:8080/predict' --header 'Content-Type: application/json' --data-raw '{
@@ -87,16 +102,118 @@ curl --location --request POST 'http://localhost:8080/predict' --header 'Content
}'
```
Example of starting the API with `docker`:
There is also available a simple web interface (for instance, http://localhost:8080/) which can be used as a playground.
Note: The API doesn't inject a template for talking to the instance, while the CLI does. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release, for instance:
```
Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Response:
```
## Using other models
You can specify a model binary to be used for inference with `--model`.
13B and 30B alpaca models are known to work:
```
# Download the model image, extract the model
# Use the model with llama-cli
docker run -v $PWD:/models -p 8080:8080 -ti --rm quay.io/go-skynet/llama-cli:v0.4 api --model /models/model.bin
```
gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
```bash
docker run -ti --rm quay.io/go-skynet/llama-cli:latest api
wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
mkdir models
cp gpt4all.. models/
git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
pip install sentencepiece
python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
# There will be a new model with the ".tmp" extension, you have to use that one!
```
### Golang client API
The `llama-cli` codebase has also a small client in go that can be used alongside with the api:
```golang
package main
import (
"fmt"
client "github.com/go-skynet/llama-cli/client"
)
func main() {
cli := client.NewClient("http://ip:30007")
out, err := cli.Predict("What's an alpaca?")
if err != nil {
panic(err)
}
fmt.Println(out)
}
```
### Windows compatibility
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/llama-cli/issues/2
### Kubernetes
You can run the API directly in Kubernetes:
```bash
kubectl apply -f https://raw.githubusercontent.com/go-skynet/llama-cli/master/kubernetes/deployment.yaml
```
```
### Build locally
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
In order to build the `llama-cli` container image locally you can use `docker`:
```
# build the image as "alpaca-image"
docker run --privileged -v /var/run/docker.sock:/var/run/docker.sock --rm -t -v "$(pwd)":/workspace -v earthly-tmp:/tmp/earthly:rw earthly/earthly:v0.7.2 +image --IMAGE=alpaca-image
# run the image
docker run alpaca-image --instruction "What's an alpaca?"
```
Or build the binary with:
```
# build the image as "alpaca-image"
docker run --privileged -v /var/run/docker.sock:/var/run/docker.sock --rm -t -v "$(pwd)":/workspace -v earthly-tmp:/tmp/earthly:rw earthly/earthly:v0.7.2 +build
# run the binary
./llama-cli --instruction "What's an alpaca?"
```
## Short-term roadmap
- Mimic OpenAI API (https://github.com/go-skynet/llama-cli/issues/10)
- Binary releases (https://github.com/go-skynet/llama-cli/issues/6)
- Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
- Multi-model support
- Full Deployment and compatibility with https://github.com/mckaywrigley/chatbot-ui
## License
MIT
## Acknowledgements
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
- https://github.com/tatsu-lab/stanford_alpaca
- https://github.com/cornelk/llama-go for the initial ideas
- https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)

22
api.go
View File

@@ -1,20 +1,25 @@
package main
import (
"embed"
"net/http"
"strconv"
"sync"
llama "github.com/go-skynet/llama/go"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/filesystem"
)
func api(model, listenAddr string, threads int) error {
//go:embed index.html
var indexHTML embed.FS
func api(l *llama.LLama, listenAddr string, threads int) error {
app := fiber.New()
l, err := llama.New(model)
if err != nil {
return err
}
app.Use("/", filesystem.New(filesystem.Config{
Root: http.FS(indexHTML),
NotFoundFile: "index.html",
}))
/*
curl --location --request POST 'http://localhost:8080/predict' --header 'Content-Type: application/json' --data-raw '{
"text": "What is an alpaca?",
@@ -24,9 +29,12 @@ func api(model, listenAddr string, threads int) error {
"tokens": 100
}'
*/
var mutex = &sync.Mutex{}
// Endpoint to generate the prediction
app.Post("/predict", func(c *fiber.Ctx) error {
mutex.Lock()
defer mutex.Unlock()
// Get input data from the request body
input := new(struct {
Text string `json:"text"`

75
client/client.go Normal file
View File

@@ -0,0 +1,75 @@
package client
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
)
type Prediction struct {
Prediction string `json:"prediction"`
}
type Client struct {
baseURL string
client *http.Client
endpoint string
}
func NewClient(baseURL string) *Client {
return &Client{
baseURL: baseURL,
client: &http.Client{},
endpoint: "/predict",
}
}
type InputData struct {
Text string `json:"text"`
TopP float64 `json:"topP,omitempty"`
TopK int `json:"topK,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
Tokens int `json:"tokens,omitempty"`
}
func (c *Client) Predict(text string, opts ...InputOption) (string, error) {
input := NewInputData(opts...)
input.Text = text
// encode input data to JSON format
inputBytes, err := json.Marshal(input)
if err != nil {
return "", err
}
// create HTTP request
url := c.baseURL + c.endpoint
req, err := http.NewRequest("POST", url, bytes.NewBuffer(inputBytes))
if err != nil {
return "", err
}
// set request headers
req.Header.Set("Content-Type", "application/json")
// send request and get response
resp, err := c.client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("request failed with status %d", resp.StatusCode)
}
// decode response body to Prediction struct
var prediction Prediction
err = json.NewDecoder(resp.Body).Decode(&prediction)
if err != nil {
return "", err
}
return prediction.Prediction, nil
}

51
client/options.go Normal file
View File

@@ -0,0 +1,51 @@
package client
import "net/http"
type ClientOption func(c *Client)
func WithHTTPClient(httpClient *http.Client) ClientOption {
return func(c *Client) {
c.client = httpClient
}
}
func WithEndpoint(endpoint string) ClientOption {
return func(c *Client) {
c.endpoint = endpoint
}
}
type InputOption func(d *InputData)
func NewInputData(opts ...InputOption) *InputData {
data := &InputData{}
for _, opt := range opts {
opt(data)
}
return data
}
func WithTopP(topP float64) InputOption {
return func(d *InputData) {
d.TopP = topP
}
}
func WithTopK(topK int) InputOption {
return func(d *InputData) {
d.TopK = topK
}
}
func WithTemperature(temperature float64) InputOption {
return func(d *InputData) {
d.Temperature = temperature
}
}
func WithTokens(tokens int) InputOption {
return func(d *InputData) {
d.Tokens = tokens
}
}

19
go.mod
View File

@@ -3,19 +3,31 @@ module github.com/go-skynet/llama-cli
go 1.19
require (
github.com/go-skynet/llama v0.0.0-20230319223917-0076188dd548
github.com/charmbracelet/bubbles v0.15.0
github.com/charmbracelet/bubbletea v0.23.2
github.com/charmbracelet/lipgloss v0.7.1
github.com/go-skynet/llama v0.0.0-20230329165201-84efc8db3647
github.com/gofiber/fiber/v2 v2.42.0
github.com/urfave/cli/v2 v2.25.0
)
require (
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/atotto/clipboard v0.1.4 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/containerd/console v1.0.3 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/klauspost/compress v1.15.9 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/reflow v0.3.0 // indirect
github.com/muesli/termenv v0.15.1 // indirect
github.com/philhofer/fwd v1.1.1 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
@@ -26,5 +38,8 @@ require (
github.com/valyala/fasthttp v1.44.0 // indirect
github.com/valyala/tcplisten v1.0.0 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/sys v0.6.0 // indirect
golang.org/x/term v0.5.0 // indirect
golang.org/x/text v0.7.0 // indirect
)

60
go.sum
View File

@@ -1,28 +1,72 @@
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/aymanbagabas/go-osc52 v1.0.3/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4=
github.com/aymanbagabas/go-osc52 v1.2.1/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/charmbracelet/bubbles v0.15.0 h1:c5vZ3woHV5W2b8YZI1q7v4ZNQaPetfHuoHzx+56Z6TI=
github.com/charmbracelet/bubbles v0.15.0/go.mod h1:Y7gSFbBzlMpUDR/XM9MhZI374Q+1p1kluf1uLl8iK74=
github.com/charmbracelet/bubbletea v0.23.1/go.mod h1:JAfGK/3/pPKHTnAS8JIE2u9f61BjWTQY57RbT25aMXU=
github.com/charmbracelet/bubbletea v0.23.2 h1:vuUJ9HJ7b/COy4I30e8xDVQ+VRDUEFykIjryPfgsdps=
github.com/charmbracelet/bubbletea v0.23.2/go.mod h1:FaP3WUivcTM0xOKNmhciz60M6I+weYLF76mr1JyI7sM=
github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao=
github.com/charmbracelet/lipgloss v0.6.0/go.mod h1:tHh2wr34xcHjC2HCXIlGSG1jaDF0S0atAUvBMP6Ppuk=
github.com/charmbracelet/lipgloss v0.7.1 h1:17WMwi7N1b1rVWOjMT+rCh7sQkvDU75B2hbZpc5Kc1E=
github.com/charmbracelet/lipgloss v0.7.1/go.mod h1:yG0k3giv8Qj8edTCbbg6AlQ5e8KNWpFujkNawKNhE2c=
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/go-skynet/llama v0.0.0-20230319223917-0076188dd548 h1:wXcEESf+zNXidrSZoDKBoIJQDDMzUwVysLIbCkWGYvM=
github.com/go-skynet/llama v0.0.0-20230319223917-0076188dd548/go.mod h1:ZtYsAIud4cvP9VTTI9uhdgR1uCwaO/gGKnZZ95h9i7w=
github.com/go-skynet/llama v0.0.0-20230321172246-7be5326e18cc h1:NcmO8mA7iRZIX0Qy2SjcsSaV14+g87MiTey1neUJaFQ=
github.com/go-skynet/llama v0.0.0-20230321172246-7be5326e18cc/go.mod h1:ZtYsAIud4cvP9VTTI9uhdgR1uCwaO/gGKnZZ95h9i7w=
github.com/go-skynet/llama v0.0.0-20230325223742-a3563a2690ba h1:u6OhAqlWFHsTjfWKePdK2kP4/mTyXX5vsmKwrK5QX6o=
github.com/go-skynet/llama v0.0.0-20230325223742-a3563a2690ba/go.mod h1:ZtYsAIud4cvP9VTTI9uhdgR1uCwaO/gGKnZZ95h9i7w=
github.com/go-skynet/llama v0.0.0-20230329165201-84efc8db3647 h1:W6qHHD/Bv6wRXSzdv38gWMAXgw3fklHyEblfw88uEUU=
github.com/go-skynet/llama v0.0.0-20230329165201-84efc8db3647/go.mod h1:ZtYsAIud4cvP9VTTI9uhdgR1uCwaO/gGKnZZ95h9i7w=
github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b h1:1XF24mVaiu7u+CFywTdcDo2ie1pzzhwjt6RHqzpMU34=
github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b/go.mod h1:fQuZ0gauxyBcmsdE3ZT4NasjaRdxmbCS0jRHsrWu3Ho=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68/go.mod h1:Xk+z4oIWdQqJzsxyjgl3P22oYZnHdZ8FFTHAQQt5BMQ=
github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
github.com/muesli/termenv v0.11.1-0.20220204035834-5ac8409525e0/go.mod h1:Bd5NYQ7pd+SrtBSrSNoBBmXlcY8+Xj4BMJgh8qcZrvs=
github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc=
github.com/muesli/termenv v0.14.0/go.mod h1:kG/pF1E7fh949Xhe156crRUrHNyK221IuGO7Ez60Uc8=
github.com/muesli/termenv v0.15.1 h1:UzuTb/+hhlBugQz28rpzey4ZuKcZ03MeKsoG7IJZIxs=
github.com/muesli/termenv v0.15.1/go.mod h1:HeAQPTzpfs016yGtA4g00CsdYnVLJvxsS4ANqrZs2sQ=
github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sahilm/fuzzy v0.1.0/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
@@ -52,21 +96,31 @@ golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qx
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab h1:2QkjZIsXupsJbJIdSjjUOgWK3aEtzyuh2mPt3l/CkeU=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=

120
index.html Normal file
View File

@@ -0,0 +1,120 @@
<!DOCTYPE html>
<html>
<head>
<title>llama-cli</title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css" crossorigin="anonymous" referrerpolicy="no-referrer" />
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
</head>
<style>
@keyframes rotating {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
.waiting {
animation: rotating 1s linear infinite;
}
</style>
<body>
<div class="container mt-5" x-data="{ templates:[
{
name: 'Alpaca: Instruction without input',
text: `Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{{.Instruction}}
### Response:`,
},
{
name: 'Alpaca: Instruction with input',
text: `Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{{.Instruction}}
### Input:
{{.Input}}
### Response:`,
}
], selectedTemplate: '', selectedTemplateText: '' }">
<h1>llama-cli API</h1>
<div class="form-group">
<label for="inputText">Input Text:</label>
<textarea class="form-control" id="inputText" rows="6" placeholder="Your text input here..." x-text="selectedTemplateText"></textarea>
</div>
<div class="form-group">
<label for="templateSelect">Select Template:</label>
<select class="form-control" id="templateSelect" x-model="selectedTemplateText">
<option value="">None</option>
<template x-for="(template, index) in templates" :key="index">
<option :value="template.text" x-text="template.name"></option>
</template>
</select>
</div>
<div class="form-group">
<label for="topP">Top P:</label>
<input type="range" step="0.01" min="0" max="1" class="form-control" id="topP" value="0.20" name="topP" onchange="this.nextElementSibling.value = this.value" required>
<output>0.20</output>
</div>
<div class="form-group">
<label for="topK">Top K:</label>
<input type="number" class="form-control" id="topK" value="10000" name="topK" required>
</div>
<div class="form-group">
<label for="temperature">Temperature:</label>
<input type="range" step="0.01" min="0" max="1" value="0.9" class="form-control" id="temperature" name="temperature" onchange="this.nextElementSibling.value = this.value" required>
<output>0.9</output>
</div>
<div class="form-group">
<label for="tokens">Tokens:</label>
<input type="number" class="form-control" id="tokens" name="tokens" value="128" required>
</div>
<button class="btn btn-primary" x-on:click="submitRequest()">Submit <i class="fas fa-paper-plane"></i></button>
<hr>
<div class="form-group">
<label for="outputText">Output Text:</label>
<textarea class="form-control" id="outputText" rows="5" readonly></textarea>
</div>
</div>
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
<script>
function submitRequest() {
var button = document.querySelector("i.fa-paper-plane");
button.classList.add("waiting");
var text = document.getElementById("inputText").value;
var url = "/predict";
var data = {
"text": text,
"topP": document.getElementById("topP").value,
"topK": document.getElementById("topK").value,
"temperature": document.getElementById("temperature").value,
"tokens": document.getElementById("tokens").value
};
fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify(data)
})
.then(response => response.json())
.then(data => {
document.getElementById("outputText").value = data.prediction;
button.classList.remove("waiting");
})
.catch(error => { console.error(error); button.classList.remove("waiting"); });
}
</script>
</body>
</html>

142
interactive.go Normal file
View File

@@ -0,0 +1,142 @@
package main
// A simple program demonstrating the text area component from the Bubbles
// component library.
import (
"fmt"
"strings"
"github.com/charmbracelet/bubbles/textarea"
"github.com/charmbracelet/bubbles/viewport"
tea "github.com/charmbracelet/bubbletea"
"github.com/charmbracelet/lipgloss"
llama "github.com/go-skynet/llama/go"
)
func startInteractive(l *llama.LLama, opts ...llama.PredictOption) error {
p := tea.NewProgram(initialModel(l, opts...))
_, err := p.Run()
return err
}
type (
errMsg error
)
type model struct {
viewport viewport.Model
messages *[]string
textarea textarea.Model
senderStyle lipgloss.Style
err error
l *llama.LLama
opts []llama.PredictOption
predictC chan string
}
func initialModel(l *llama.LLama, opts ...llama.PredictOption) model {
ta := textarea.New()
ta.Placeholder = "Send a message..."
ta.Focus()
ta.Prompt = "┃ "
ta.CharLimit = 280
ta.SetWidth(200)
ta.SetHeight(3)
// Remove cursor line styling
ta.FocusedStyle.CursorLine = lipgloss.NewStyle()
ta.ShowLineNumbers = false
vp := viewport.New(200, 5)
vp.SetContent(`Welcome to llama-cli. Type a message and press Enter to send. Alpaca doesn't keep context of the whole chat (yet).`)
ta.KeyMap.InsertNewline.SetEnabled(false)
predictChannel := make(chan string)
messages := []string{}
m := model{
textarea: ta,
messages: &messages,
viewport: vp,
senderStyle: lipgloss.NewStyle().Foreground(lipgloss.Color("5")),
err: nil,
l: l,
opts: opts,
predictC: predictChannel,
}
go func() {
for p := range predictChannel {
str, _ := templateString(emptyInput, struct {
Instruction string
Input string
}{Instruction: p})
res, _ := l.Predict(
str,
opts...,
)
mm := *m.messages
*m.messages = mm[:len(mm)-1]
*m.messages = append(*m.messages, m.senderStyle.Render("llama: ")+res)
m.viewport.SetContent(strings.Join(*m.messages, "\n"))
ta.Reset()
m.viewport.GotoBottom()
}
}()
return m
}
func (m model) Init() tea.Cmd {
return textarea.Blink
}
func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
var (
tiCmd tea.Cmd
vpCmd tea.Cmd
)
m.textarea, tiCmd = m.textarea.Update(msg)
m.viewport, vpCmd = m.viewport.Update(msg)
switch msg := msg.(type) {
case tea.WindowSizeMsg:
// m.viewport.Width = msg.Width
// m.viewport.Height = msg.Height
case tea.KeyMsg:
switch msg.Type {
case tea.KeyCtrlC, tea.KeyEsc:
fmt.Println(m.textarea.Value())
return m, tea.Quit
case tea.KeyEnter:
*m.messages = append(*m.messages, m.senderStyle.Render("You: ")+m.textarea.Value(), m.senderStyle.Render("Loading response..."))
m.predictC <- m.textarea.Value()
m.viewport.SetContent(strings.Join(*m.messages, "\n"))
m.textarea.Reset()
m.viewport.GotoBottom()
}
// We handle errors just like any other message
case errMsg:
m.err = msg
return m, nil
}
return m, tea.Batch(tiCmd, vpCmd)
}
func (m model) View() string {
return fmt.Sprintf(
"%s\n\n%s",
m.viewport.View(),
m.textarea.View(),
) + "\n\n"
}

View File

@@ -25,7 +25,7 @@ spec:
- name: llama
args:
- api
image: quay.io/go-skynet/llama-cli:latest
image: quay.io/go-skynet/llama-cli:v0.3
---
apiVersion: v1
kind: Service
@@ -39,4 +39,4 @@ spec:
ports:
- protocol: TCP
port: 8080
targetPort: 8080
targetPort: 8080

138
main.go
View File

@@ -31,6 +31,17 @@ var nonEmptyInput string = `Below is an instruction that describes a task, paire
### Response:
`
func llamaFromOptions(ctx *cli.Context) (*llama.LLama, error) {
opts := []llama.ModelOption{llama.SetContext(ctx.Int("context-size"))}
if ctx.Bool("alpaca") {
opts = append(opts, llama.EnableAlpaca)
}
if ctx.Bool("gpt4all") {
opts = append(opts, llama.EnableGPT4All)
}
return llama.New(ctx.String("model"), opts...)
}
func templateString(t string, in interface{}) (string, error) {
// Parse the template
tmpl, err := template.New("prompt").Parse(t)
@@ -46,12 +57,59 @@ func templateString(t string, in interface{}) (string, error) {
return buf.String(), nil
}
var modelFlags = []cli.Flag{
&cli.StringFlag{
Name: "model",
EnvVars: []string{"MODEL_PATH"},
},
&cli.IntFlag{
Name: "tokens",
EnvVars: []string{"TOKENS"},
Value: 128,
},
&cli.IntFlag{
Name: "context-size",
EnvVars: []string{"CONTEXT_SIZE"},
Value: 512,
},
&cli.IntFlag{
Name: "threads",
EnvVars: []string{"THREADS"},
Value: runtime.NumCPU(),
},
&cli.Float64Flag{
Name: "temperature",
EnvVars: []string{"TEMPERATURE"},
Value: 0.95,
},
&cli.Float64Flag{
Name: "topp",
EnvVars: []string{"TOP_P"},
Value: 0.85,
},
&cli.IntFlag{
Name: "topk",
EnvVars: []string{"TOP_K"},
Value: 20,
},
&cli.BoolFlag{
Name: "alpaca",
EnvVars: []string{"ALPACA"},
Value: true,
},
&cli.BoolFlag{
Name: "gpt4all",
EnvVars: []string{"GPT4ALL"},
Value: false,
},
}
func main() {
app := &cli.App{
Name: "llama-cli",
Version: "0.1",
Usage: "llama-cli --model ... --instruction 'What is an alpaca?'",
Flags: []cli.Flag{
Flags: append(modelFlags,
&cli.StringFlag{
Name: "template",
EnvVars: []string{"TEMPLATE"},
@@ -63,37 +121,7 @@ func main() {
&cli.StringFlag{
Name: "input",
EnvVars: []string{"INPUT"},
},
&cli.StringFlag{
Name: "model",
EnvVars: []string{"MODEL_PATH"},
},
&cli.IntFlag{
Name: "tokens",
EnvVars: []string{"TOKENS"},
Value: 128,
},
&cli.IntFlag{
Name: "threads",
EnvVars: []string{"THREADS"},
Value: runtime.NumCPU(),
},
&cli.Float64Flag{
Name: "temperature",
EnvVars: []string{"TEMPERATURE"},
Value: 0.95,
},
&cli.Float64Flag{
Name: "topp",
EnvVars: []string{"TOP_P"},
Value: 0.85,
},
&cli.IntFlag{
Name: "topk",
EnvVars: []string{"TOP_K"},
Value: 20,
},
},
}),
Description: `Run llama.cpp inference`,
UsageText: `
llama-cli --model ~/ggml-alpaca-7b-q4.bin --instruction "What's an alpaca?"
@@ -107,6 +135,25 @@ echo "An Alpaca (Vicugna pacos) is a domesticated species of South American came
Copyright: "go-skynet authors",
Commands: []*cli.Command{
{
Flags: modelFlags,
Name: "interactive",
Action: func(ctx *cli.Context) error {
l, err := llamaFromOptions(ctx)
if err != nil {
fmt.Println("Loading the model failed:", err.Error())
os.Exit(1)
}
return startInteractive(l, llama.SetTemperature(ctx.Float64("temperature")),
llama.SetTopP(ctx.Float64("topp")),
llama.SetTopK(ctx.Int("topk")),
llama.SetTokens(ctx.Int("tokens")),
llama.SetThreads(ctx.Int("threads")))
},
},
{
Name: "api",
Flags: []cli.Flag{
&cli.IntFlag{
@@ -123,9 +170,30 @@ echo "An Alpaca (Vicugna pacos) is a domesticated species of South American came
EnvVars: []string{"ADDRESS"},
Value: ":8080",
},
&cli.BoolFlag{
Name: "alpaca",
EnvVars: []string{"ALPACA"},
Value: true,
},
&cli.BoolFlag{
Name: "gpt4all",
EnvVars: []string{"GPT4ALL"},
Value: false,
},
&cli.IntFlag{
Name: "context-size",
EnvVars: []string{"CONTEXT_SIZE"},
Value: 512,
},
},
Action: func(ctx *cli.Context) error {
return api(ctx.String("model"), ctx.String("address"), ctx.Int("threads"))
l, err := llamaFromOptions(ctx)
if err != nil {
fmt.Println("Loading the model failed:", err.Error())
os.Exit(1)
}
return api(l, ctx.String("address"), ctx.Int("threads"))
},
},
},
@@ -179,11 +247,13 @@ echo "An Alpaca (Vicugna pacos) is a domesticated species of South American came
fmt.Println("Templating the input failed:", err.Error())
os.Exit(1)
}
l, err := llama.New(ctx.String("model"))
l, err := llamaFromOptions(ctx)
if err != nil {
fmt.Println("Loading the model failed:", err.Error())
os.Exit(1)
}
res, err := l.Predict(
str,
llama.SetTemperature(ctx.Float64("temperature")),