Cleanup gh-pages branch

Signed-off-by: Dimitris Karakasilis <dimitris@spectrocloud.com>
chore(deps): update actions/checkout action to v3 (#82 )
2026-05-25 01:02:05 -04:00 · 2023-04-26 11:55:04 +03:00 · 2023-04-25 07:46:29 +02:00 · 2023-04-25 07:46:14 +02:00 · 2023-04-24 23:42:03 +02:00 · 2023-04-24 18:16:23 +02:00
22 changed files with 191 additions and 1287 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,3 @@
+ARG GO_VERSION=1.20
+FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
+RUN apt-get update && apt-get install -y cmake
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,46 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
+{
+	"name": "Existing Docker Compose (Extend)",
+
+	// Update the 'dockerComposeFile' list if you have more compose files or use different names.
+	// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
+	"dockerComposeFile": [
+		"../docker-compose.yaml",
+		"docker-compose.yml"
+	],
+
+	// The 'service' property is the name of the service for the container that VS Code should
+	// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
+	"service": "api",
+
+	// The optional 'workspaceFolder' property is the path VS Code should open by default when
+	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
+	"workspaceFolder": "/workspace",
+
+	"features": {
+		"ghcr.io/devcontainers/features/go:1": {},
+		"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
+	},
+
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Uncomment the next line if you want start specific services in your Docker Compose config.
+	// "runServices": [],
+
+	// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
+	// "shutdownAction": "none",
+
+	// Uncomment the next line to run commands after the container is created.
+	"postCreateCommand": "make prepare"
+
+	// Configure tool-specific properties.
+	// "customizations": {},
+
+	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "devcontainer"
+}
--- a/.devcontainer/docker-compose.yml
+++ b/.devcontainer/docker-compose.yml
@@ -0,0 +1,26 @@
+version: '3.6'
+services:
+  # Update this to the name of the service you want to work with in your docker-compose.yml file
+  api:
+    # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer 
+    # folder. Note that the path of the Dockerfile and context is relative to the *primary* 
+    # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
+    # array). The sample below assumes your primary file is in the root of your project.
+    #
+    build:
+      context: .
+      dockerfile: .devcontainer/Dockerfile
+
+    volumes:
+      # Update this to wherever you want VS Code to mount the folder of your project
+      - .:/workspace:cached
+
+    # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
+    # cap_add:
+    #   - SYS_PTRACE
+    # security_opt:
+    #   - seccomp:unconfined
+
+    # Overrides default command so things don't shut down after the process ends.
+    command: /bin/sh -c "while sleep 1000; do :; done"
+ 
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1 @@
+models
--- a/.env
+++ b/.env
@@ -0,0 +1,5 @@
+# THREADS=14
+# CONTEXT_SIZE=512
+MODELS_PATH=/models
+# DEBUG=true
+# BUILD_TYPE=generic
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -2,6 +2,7 @@
 name: 'build container images'

 on:
+  pull_request:
  push:
    branches:
      - master
@@ -12,68 +13,42 @@ jobs:
  docker:
    runs-on: ubuntu-latest
    steps:
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          df -h
      - name: Checkout
        uses: actions/checkout@v3
+
      - name: Prepare
        id: prep
        run: |
-          DOCKER_IMAGE=quay.io/go-skynet/llama-cli
-          VERSION=latest
+          DOCKER_IMAGE=quay.io/go-skynet/local-ai
+          VERSION=master
          SHORTREF=${GITHUB_SHA::8}
+
          # If this is git tag, use the tag name as a docker tag
          if [[ $GITHUB_REF == refs/tags/* ]]; then
            VERSION=${GITHUB_REF#refs/tags/}
          fi
          TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
+
          # If the VERSION looks like a version number, assume that
          # this is the most recent version of the image and also
          # tag it 'latest'.
-          if [[ $VERSION =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+          if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
            TAGS="$TAGS,${DOCKER_IMAGE}:latest"
          fi
+
          # Set output parameters.
          echo ::set-output name=tags::${TAGS}
          echo ::set-output name=docker_image::${DOCKER_IMAGE}
-          echo ::set-output name=image::${DOCKER_IMAGE}:${VERSION}
+
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
        with:
          platforms: all
+
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@master
+
      - name: Login to DockerHub
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v2
@@ -81,9 +56,23 @@ jobs:
          registry: quay.io
          username: ${{ secrets.QUAY_USERNAME }}
          password: ${{ secrets.QUAY_PASSWORD }}
-      - uses: earthly/actions/setup-earthly@v1
      - name: Build
-        run: |
-            earthly config "global.conversion_parallelism" "1"
-            earthly config "global.buildkit_max_parallelism" "1"
-            earthly --push +image-all --IMAGE=${{ steps.prep.outputs.image }}
+        if: github.event_name != 'pull_request'
+        uses: docker/build-push-action@v4
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.prep.outputs.tags }}
+      - name: Build PRs
+        if: github.event_name == 'pull_request'
+        uses: docker/build-push-action@v4
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64
+          push: false
+          tags: ${{ steps.prep.outputs.tags }}
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -0,0 +1,44 @@
+---
+name: 'tests'
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+    tags:
+      - '*'
+
+jobs:
+  ubuntu-latest:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+      - name: Test
+        run: |
+          make test
+
+  macOS-latest:
+    runs-on: macOS-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+
+      - name: Dependencies
+        run: |
+          brew update
+          brew install sdl2
+      - name: Test
+        run: |
+          make test
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,15 @@
+# go-llama build artifacts
+go-llama
+go-gpt4all-j
+go-gpt2
+
+# LocalAI build binary
+LocalAI
+local-ai
+# prevent above rules from omitting the helm chart
+!charts/*
+
+# Ignore models
+models/*.bin
+models/ggml-*
+test-models/
--- a/.goreleaser.yaml
+++ b/.goreleaser.yaml
@@ -1,5 +1,5 @@
 # Make sure to check the documentation at http://goreleaser.com
-project_name: llama-cli
+project_name: local-ai
 builds:
  - ldflags:
      - -w -s
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,20 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Launch Go",
+            "type": "go",
+            "request": "launch",
+            "mode": "debug",
+            "program": "${workspaceFolder}/main.go",
+            "args": [
+                "api"
+            ],
+            "env": {
+                "C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "DEBUG": "true"
+            }
+        }
+    ]
+}
--- a/31
+++ b/31
@@ -1,31 +0,0 @@
-VERSION 0.7
-
-go-deps:
-    ARG GO_VERSION=1.20
-    FROM golang:$GO_VERSION
-    WORKDIR /build
-    COPY go.mod ./
-    COPY go.sum ./
-    RUN go mod download
-    RUN apt-get update
-    SAVE ARTIFACT go.mod AS LOCAL go.mod
-    SAVE ARTIFACT go.sum AS LOCAL go.sum
-
-build:
-    FROM +go-deps
-    WORKDIR /build
-    RUN git clone https://github.com/go-skynet/llama
-    RUN cd llama && make libllama.a
-    COPY . .
-    RUN C_INCLUDE_PATH=/build/llama LIBRARY_PATH=/build/llama go build -o llama-cli ./
-    SAVE ARTIFACT llama-cli AS LOCAL llama-cli
-
-image:
-    FROM +go-deps
-    ARG IMAGE=alpaca-cli-nomodel
-    COPY +build/llama-cli /llama-cli
-    ENTRYPOINT [ "/llama-cli" ]
-    SAVE IMAGE --push $IMAGE
-
-image-all:
-    BUILD --platform=linux/amd64 --platform=linux/arm64 +image
--- a/21
+++ b/21
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2023 go-skynet authors
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -1,219 +0,0 @@
-## :camel: llama-cli
-
-
-llama-cli is a straightforward golang CLI interface for [llama.cpp](https://github.com/ggerganov/llama.cpp), providing a simple API and a command line interface that allows text generation using a GPT-based model like llama directly from the terminal. It is also compatible with [gpt4all](https://github.com/nomic-ai/gpt4all) and [alpaca](https://github.com/tatsu-lab/stanford_alpaca).
-
-`llama-cli` uses https://github.com/go-skynet/llama, which is a fork of [llama.cpp](https://github.com/ggerganov/llama.cpp) providing golang binding.
-
-## Container images
-
-To begin, run:
-
-```
-docker run -ti --rm quay.io/go-skynet/llama-cli:v0.4  --instruction "What's an alpaca?" --topk 10000 --model ...
-```
-
-You will receive a response like the following:
-
-```
-An alpaca is a member of the South American Camelid family, which includes the llama, guanaco and vicuña. It is a domesticated species that originates from the Andes mountain range in South America. Alpacas are used in the textile industry for their fleece, which is much softer than wool. Alpacas are also used for meat, milk, and fiber.
-```
-
-## Basic usage
-
-To use llama-cli, specify a pre-trained GPT-based model, an input text, and an instruction for text generation. llama-cli takes the following arguments when running from the CLI:
-
-```
-llama-cli --model <model_path> --instruction <instruction> [--input <input>] [--template <template_path>] [--tokens <num_tokens>] [--threads <num_threads>] [--temperature <temperature>] [--topp <top_p>] [--topk <top_k>]
-```
-
-| Parameter    | Environment Variable | Default Value | Description                            |
-| ------------ | -------------------- | ------------- | -------------------------------------- |
-| template     | TEMPLATE             |               | A file containing a template for output formatting (optional).  |
-| instruction  | INSTRUCTION          |               | Input prompt text or instruction. "-" for STDIN.   |
-| input        | INPUT                | -             | Path to text or "-" for STDIN.                    |
-| model        | MODEL_PATH           |               | The path to the pre-trained GPT-based model.      |
-| tokens       | TOKENS               | 128           | The maximum number of tokens to generate. |
-| threads      | THREADS              | NumCPU()      | The number of threads to use for text generation. |
-| temperature  | TEMPERATURE          | 0.95          | Sampling temperature for model output. ( values between `0.1` and `1.0` )  |
-| top_p        | TOP_P                | 0.85          | The cumulative probability for top-p sampling. |
-| top_k        | TOP_K                | 20            | The number of top-k tokens to consider for text generation.  |
-| context-size | CONTEXT_SIZE         | 512           | Default token context size. |
-| alpaca       | ALPACA               | true          | Set to true for alpaca models. |
-| gpt4all       | GPT4ALL               | false          | Set to true for gpt4all models. |
-
-Here's an example of using `llama-cli`:
-
-```
-llama-cli --model ~/ggml-alpaca-7b-q4.bin --instruction "What's an alpaca?"
-```
-
-This will generate text based on the given model and instruction.
-
-## Advanced usage
-
-`llama-cli` also provides an API for running text generation as a service. The model will be pre-loaded and kept in memory.
-
-Example of starting the API with `docker`:
-
-```bash
-docker run -p 8080:8080 -ti --rm quay.io/go-skynet/llama-cli:v0.4 api --context-size 700 --threads 4
-```
-
-And you'll see:
-```
-┌───────────────────────────────────────────────────┐ 
-│                   Fiber v2.42.0                   │ 
-│               http://127.0.0.1:8080               │ 
-│       (bound on host 0.0.0.0 and port 8080)       │ 
-│                                                   │ 
-│ Handlers ............. 1  Processes ........... 1 │ 
-│ Prefork ....... Disabled  PID ................. 1 │ 
-└───────────────────────────────────────────────────┘ 
-```
-
-You can control the API server options with command line arguments:
-
-```
-llama-cli api --model <model_path> [--address <address>] [--threads <num_threads>]
-```
-
-The API takes takes the following:
-
-| Parameter    | Environment Variable | Default Value | Description                            |
-| ------------ | -------------------- | ------------- | -------------------------------------- |
-| model        | MODEL_PATH           |               | The path to the pre-trained GPT-based model.      |
-| threads      | THREADS              | CPU cores     | The number of threads to use for text generation. |
-| address      | ADDRESS              | :8080         | The address and port to listen on. |
-| context-size | CONTEXT_SIZE         | 512           | Default token context size. |
-| alpaca       | ALPACA               | true          | Set to true for alpaca models. |
-| gpt4all       | GPT4ALL               | false          | Set to true for gpt4all models. |
-
-
-Once the server is running, you can start making requests to it using HTTP. For example, to generate text based on an instruction, you can send a POST request to the `/predict` endpoint with the instruction as the request body:
-
-```
-curl --location --request POST 'http://localhost:8080/predict' --header 'Content-Type: application/json' --data-raw '{
-    "text": "What is an alpaca?",
-    "topP": 0.8,
-    "topK": 50,
-    "temperature": 0.7,
-    "tokens": 100
-}'
-```
-
-There is also available a simple web interface (for instance, http://localhost:8080/) which can be used as a playground.
-
-Note: The API doesn't inject a template for talking to the instance, while the CLI does. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release, for instance:
-
-```
-Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-### Instruction:
-{instruction}
-
-### Response:
-```
-
-## Using other models
-
-You can specify a model binary to be used for inference with `--model`.
-
-13B and 30B alpaca models are known to work:
-
-```
-# Download the model image, extract the model
-# Use the model with llama-cli
-docker run -v $PWD:/models -p 8080:8080 -ti --rm quay.io/go-skynet/llama-cli:v0.4 api --model /models/model.bin
-```
-
-gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
-
-```bash
-wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
-mkdir models
-cp gpt4all.. models/
-git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
-pip install sentencepiece
-python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
-# There will be a new model with the ".tmp" extension, you have to use that one!
-```
-
-### Golang client API
-
-The `llama-cli` codebase has also a small client in go that can be used alongside with the api:
-
-```golang
-package main
-
-import (
-	"fmt"
-
-	client "github.com/go-skynet/llama-cli/client"
-)
-
-func main() {
-
-	cli := client.NewClient("http://ip:30007")
-
-	out, err := cli.Predict("What's an alpaca?")
-	if err != nil {
-		panic(err)
-	}
-
-	fmt.Println(out)
-}
-```
-
-### Windows compatibility
-
-It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/llama-cli/issues/2
-
-### Kubernetes
-
-You can run the API directly in Kubernetes:
-
-```bash
-kubectl apply -f https://raw.githubusercontent.com/go-skynet/llama-cli/master/kubernetes/deployment.yaml
-```
-
-### Build locally
-
-Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
-
-In order to build the `llama-cli` container image locally you can use `docker`:
-
-```
-# build the image as "alpaca-image"
-docker run --privileged -v /var/run/docker.sock:/var/run/docker.sock --rm -t -v "$(pwd)":/workspace -v earthly-tmp:/tmp/earthly:rw earthly/earthly:v0.7.2 +image --IMAGE=alpaca-image
-# run the image
-docker run alpaca-image --instruction "What's an alpaca?"
-```
-
-Or build the binary with:
-
-```
-# build the image as "alpaca-image"
-docker run --privileged -v /var/run/docker.sock:/var/run/docker.sock --rm -t -v "$(pwd)":/workspace -v earthly-tmp:/tmp/earthly:rw earthly/earthly:v0.7.2 +build
-# run the binary
-./llama-cli --instruction "What's an alpaca?"
-```
-
-## Short-term roadmap
-
- Mimic OpenAI API (https://github.com/go-skynet/llama-cli/issues/10)
- Binary releases (https://github.com/go-skynet/llama-cli/issues/6)
- Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
- Multi-model support
- Full Deployment and compatibility with https://github.com/mckaywrigley/chatbot-ui
-
-## License
-
-MIT
-
-## Acknowledgements
-
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
- https://github.com/tatsu-lab/stanford_alpaca
- https://github.com/cornelk/llama-go for the initial ideas
- https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)
--- a/api.go
+++ b/api.go
@@ -1,91 +0,0 @@
-package main
-
-import (
-	"embed"
-	"net/http"
-	"strconv"
-	"sync"
-
-	llama "github.com/go-skynet/llama/go"
-	"github.com/gofiber/fiber/v2"
-	"github.com/gofiber/fiber/v2/middleware/filesystem"
-)
-
-//go:embed index.html
-var indexHTML embed.FS
-
-func api(l *llama.LLama, listenAddr string, threads int) error {
-	app := fiber.New()
-	app.Use("/", filesystem.New(filesystem.Config{
-		Root:         http.FS(indexHTML),
-		NotFoundFile: "index.html",
-	}))
-	/*
-		curl --location --request POST 'http://localhost:8080/predict' --header 'Content-Type: application/json' --data-raw '{
-		    "text": "What is an alpaca?",
-		    "topP": 0.8,
-		    "topK": 50,
-		    "temperature": 0.7,
-		    "tokens": 100
-		}'
-	*/
-	var mutex = &sync.Mutex{}
-
-	// Endpoint to generate the prediction
-	app.Post("/predict", func(c *fiber.Ctx) error {
-		mutex.Lock()
-		defer mutex.Unlock()
-		// Get input data from the request body
-		input := new(struct {
-			Text string `json:"text"`
-		})
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		// Set the parameters for the language model prediction
-		topP, err := strconv.ParseFloat(c.Query("topP", "0.9"), 64) // Default value of topP is 0.9
-		if err != nil {
-			return err
-		}
-
-		topK, err := strconv.Atoi(c.Query("topK", "40")) // Default value of topK is 40
-		if err != nil {
-			return err
-		}
-
-		temperature, err := strconv.ParseFloat(c.Query("temperature", "0.5"), 64) // Default value of temperature is 0.5
-		if err != nil {
-			return err
-		}
-
-		tokens, err := strconv.Atoi(c.Query("tokens", "128")) // Default value of tokens is 128
-		if err != nil {
-			return err
-		}
-
-		// Generate the prediction using the language model
-		prediction, err := l.Predict(
-			input.Text,
-			llama.SetTemperature(temperature),
-			llama.SetTopP(topP),
-			llama.SetTopK(topK),
-			llama.SetTokens(tokens),
-			llama.SetThreads(threads),
-		)
-		if err != nil {
-			return err
-		}
-
-		// Return the prediction in the response body
-		return c.JSON(struct {
-			Prediction string `json:"prediction"`
-		}{
-			Prediction: prediction,
-		})
-	})
-
-	// Start the server
-	app.Listen(":8080")
-	return nil
-}
--- a/client/client.go
+++ b/client/client.go
@@ -1,75 +0,0 @@
-package client
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"net/http"
-)
-
-type Prediction struct {
-	Prediction string `json:"prediction"`
-}
-
-type Client struct {
-	baseURL  string
-	client   *http.Client
-	endpoint string
-}
-
-func NewClient(baseURL string) *Client {
-	return &Client{
-		baseURL:  baseURL,
-		client:   &http.Client{},
-		endpoint: "/predict",
-	}
-}
-
-type InputData struct {
-	Text        string  `json:"text"`
-	TopP        float64 `json:"topP,omitempty"`
-	TopK        int     `json:"topK,omitempty"`
-	Temperature float64 `json:"temperature,omitempty"`
-	Tokens      int     `json:"tokens,omitempty"`
-}
-
-func (c *Client) Predict(text string, opts ...InputOption) (string, error) {
-	input := NewInputData(opts...)
-	input.Text = text
-
-	// encode input data to JSON format
-	inputBytes, err := json.Marshal(input)
-	if err != nil {
-		return "", err
-	}
-
-	// create HTTP request
-	url := c.baseURL + c.endpoint
-	req, err := http.NewRequest("POST", url, bytes.NewBuffer(inputBytes))
-	if err != nil {
-		return "", err
-	}
-
-	// set request headers
-	req.Header.Set("Content-Type", "application/json")
-
-	// send request and get response
-	resp, err := c.client.Do(req)
-	if err != nil {
-		return "", err
-	}
-	defer resp.Body.Close()
-
-	if resp.StatusCode != http.StatusOK {
-		return "", fmt.Errorf("request failed with status %d", resp.StatusCode)
-	}
-
-	// decode response body to Prediction struct
-	var prediction Prediction
-	err = json.NewDecoder(resp.Body).Decode(&prediction)
-	if err != nil {
-		return "", err
-	}
-
-	return prediction.Prediction, nil
-}
--- a/client/options.go
+++ b/client/options.go
@@ -1,51 +0,0 @@
-package client
-
-import "net/http"
-
-type ClientOption func(c *Client)
-
-func WithHTTPClient(httpClient *http.Client) ClientOption {
-	return func(c *Client) {
-		c.client = httpClient
-	}
-}
-
-func WithEndpoint(endpoint string) ClientOption {
-	return func(c *Client) {
-		c.endpoint = endpoint
-	}
-}
-
-type InputOption func(d *InputData)
-
-func NewInputData(opts ...InputOption) *InputData {
-	data := &InputData{}
-	for _, opt := range opts {
-		opt(data)
-	}
-	return data
-}
-
-func WithTopP(topP float64) InputOption {
-	return func(d *InputData) {
-		d.TopP = topP
-	}
-}
-
-func WithTopK(topK int) InputOption {
-	return func(d *InputData) {
-		d.TopK = topK
-	}
-}
-
-func WithTemperature(temperature float64) InputOption {
-	return func(d *InputData) {
-		d.Temperature = temperature
-	}
-}
-
-func WithTokens(tokens int) InputOption {
-	return func(d *InputData) {
-		d.Tokens = tokens
-	}
-}
--- a/go.mod
+++ b/go.mod
@@ -1,45 +0,0 @@
-module github.com/go-skynet/llama-cli
-
-go 1.19
-
-require (
-	github.com/charmbracelet/bubbles v0.15.0
-	github.com/charmbracelet/bubbletea v0.23.2
-	github.com/charmbracelet/lipgloss v0.7.1
-	github.com/go-skynet/llama v0.0.0-20230329165201-84efc8db3647
-	github.com/gofiber/fiber/v2 v2.42.0
-	github.com/urfave/cli/v2 v2.25.0
-)
-
-require (
-	github.com/andybalholm/brotli v1.0.4 // indirect
-	github.com/atotto/clipboard v0.1.4 // indirect
-	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
-	github.com/containerd/console v1.0.3 // indirect
-	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
-	github.com/google/uuid v1.3.0 // indirect
-	github.com/klauspost/compress v1.15.9 // indirect
-	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
-	github.com/mattn/go-colorable v0.1.13 // indirect
-	github.com/mattn/go-isatty v0.0.17 // indirect
-	github.com/mattn/go-localereader v0.0.1 // indirect
-	github.com/mattn/go-runewidth v0.0.14 // indirect
-	github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b // indirect
-	github.com/muesli/cancelreader v0.2.2 // indirect
-	github.com/muesli/reflow v0.3.0 // indirect
-	github.com/muesli/termenv v0.15.1 // indirect
-	github.com/philhofer/fwd v1.1.1 // indirect
-	github.com/rivo/uniseg v0.2.0 // indirect
-	github.com/russross/blackfriday/v2 v2.1.0 // indirect
-	github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
-	github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d // indirect
-	github.com/tinylib/msgp v1.1.6 // indirect
-	github.com/valyala/bytebufferpool v1.0.0 // indirect
-	github.com/valyala/fasthttp v1.44.0 // indirect
-	github.com/valyala/tcplisten v1.0.0 // indirect
-	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
-	golang.org/x/sync v0.1.0 // indirect
-	golang.org/x/sys v0.6.0 // indirect
-	golang.org/x/term v0.5.0 // indirect
-	golang.org/x/text v0.7.0 // indirect
-)
--- a/go.sum
+++ b/go.sum
@@ -1,129 +0,0 @@
-github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
-github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
-github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
-github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
-github.com/aymanbagabas/go-osc52 v1.0.3/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4=
-github.com/aymanbagabas/go-osc52 v1.2.1/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4=
-github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
-github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
-github.com/charmbracelet/bubbles v0.15.0 h1:c5vZ3woHV5W2b8YZI1q7v4ZNQaPetfHuoHzx+56Z6TI=
-github.com/charmbracelet/bubbles v0.15.0/go.mod h1:Y7gSFbBzlMpUDR/XM9MhZI374Q+1p1kluf1uLl8iK74=
-github.com/charmbracelet/bubbletea v0.23.1/go.mod h1:JAfGK/3/pPKHTnAS8JIE2u9f61BjWTQY57RbT25aMXU=
-github.com/charmbracelet/bubbletea v0.23.2 h1:vuUJ9HJ7b/COy4I30e8xDVQ+VRDUEFykIjryPfgsdps=
-github.com/charmbracelet/bubbletea v0.23.2/go.mod h1:FaP3WUivcTM0xOKNmhciz60M6I+weYLF76mr1JyI7sM=
-github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao=
-github.com/charmbracelet/lipgloss v0.6.0/go.mod h1:tHh2wr34xcHjC2HCXIlGSG1jaDF0S0atAUvBMP6Ppuk=
-github.com/charmbracelet/lipgloss v0.7.1 h1:17WMwi7N1b1rVWOjMT+rCh7sQkvDU75B2hbZpc5Kc1E=
-github.com/charmbracelet/lipgloss v0.7.1/go.mod h1:yG0k3giv8Qj8edTCbbg6AlQ5e8KNWpFujkNawKNhE2c=
-github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
-github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
-github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
-github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/go-skynet/llama v0.0.0-20230321172246-7be5326e18cc h1:NcmO8mA7iRZIX0Qy2SjcsSaV14+g87MiTey1neUJaFQ=
-github.com/go-skynet/llama v0.0.0-20230321172246-7be5326e18cc/go.mod h1:ZtYsAIud4cvP9VTTI9uhdgR1uCwaO/gGKnZZ95h9i7w=
-github.com/go-skynet/llama v0.0.0-20230325223742-a3563a2690ba h1:u6OhAqlWFHsTjfWKePdK2kP4/mTyXX5vsmKwrK5QX6o=
-github.com/go-skynet/llama v0.0.0-20230325223742-a3563a2690ba/go.mod h1:ZtYsAIud4cvP9VTTI9uhdgR1uCwaO/gGKnZZ95h9i7w=
-github.com/go-skynet/llama v0.0.0-20230329165201-84efc8db3647 h1:W6qHHD/Bv6wRXSzdv38gWMAXgw3fklHyEblfw88uEUU=
-github.com/go-skynet/llama v0.0.0-20230329165201-84efc8db3647/go.mod h1:ZtYsAIud4cvP9VTTI9uhdgR1uCwaO/gGKnZZ95h9i7w=
-github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
-github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
-github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
-github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
-github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
-github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
-github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
-github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
-github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
-github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
-github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
-github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
-github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
-github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
-github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
-github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
-github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
-github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b h1:1XF24mVaiu7u+CFywTdcDo2ie1pzzhwjt6RHqzpMU34=
-github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b/go.mod h1:fQuZ0gauxyBcmsdE3ZT4NasjaRdxmbCS0jRHsrWu3Ho=
-github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
-github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
-github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68/go.mod h1:Xk+z4oIWdQqJzsxyjgl3P22oYZnHdZ8FFTHAQQt5BMQ=
-github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
-github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
-github.com/muesli/termenv v0.11.1-0.20220204035834-5ac8409525e0/go.mod h1:Bd5NYQ7pd+SrtBSrSNoBBmXlcY8+Xj4BMJgh8qcZrvs=
-github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc=
-github.com/muesli/termenv v0.14.0/go.mod h1:kG/pF1E7fh949Xhe156crRUrHNyK221IuGO7Ez60Uc8=
-github.com/muesli/termenv v0.15.1 h1:UzuTb/+hhlBugQz28rpzey4ZuKcZ03MeKsoG7IJZIxs=
-github.com/muesli/termenv v0.15.1/go.mod h1:HeAQPTzpfs016yGtA4g00CsdYnVLJvxsS4ANqrZs2sQ=
-github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
-github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
-github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
-github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
-github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/sahilm/fuzzy v0.1.0/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
-github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
-github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
-github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
-github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
-github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
-github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
-github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
-github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
-github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
-github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
-github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
-github.com/valyala/fasthttp v1.44.0/go.mod h1:f6VbjjoI3z1NDOZOv17o6RvtRSWxC77seBFc2uWtgiY=
-github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
-github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
-github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
-github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
-github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
-golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
-golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
-golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY=
-golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
-golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo=
-golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
-golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
--- a/index.html
+++ b/index.html
@@ -1,120 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <title>llama-cli</title>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css" crossorigin="anonymous" referrerpolicy="no-referrer" />
-    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
-</head>
-<style>
-    @keyframes rotating {
-    from {
-        transform: rotate(0deg);
-    }
-    to {
-        transform: rotate(360deg);
-    }
-}
-
-.waiting {
-    animation: rotating 1s linear infinite;
-}
-
-</style>
-<body>
-
-<div class="container mt-5" x-data="{ templates:[
-    {
-      name: 'Alpaca: Instruction without input',
-      text: `Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-### Instruction:
-{{.Instruction}}
-
-### Response:`,
-    },
-    {
-      name: 'Alpaca: Instruction with input',
-      text: `Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
-
-### Instruction:
-{{.Instruction}}
-
-### Input:
-{{.Input}}
-
-### Response:`,
-    }
-  ], selectedTemplate: '', selectedTemplateText: '' }">
-    <h1>llama-cli API</h1>
-    <div class="form-group">
-        <label for="inputText">Input Text:</label>
-        <textarea class="form-control" id="inputText" rows="6" placeholder="Your text input here..." x-text="selectedTemplateText"></textarea>
-    </div>
-    <div class="form-group">
-        <label for="templateSelect">Select Template:</label>
-        <select class="form-control" id="templateSelect" x-model="selectedTemplateText">
-            <option value="">None</option>
-            <template x-for="(template, index) in templates" :key="index">
-                <option :value="template.text" x-text="template.name"></option>
-            </template>
-        </select>
-    </div>
-    <div class="form-group">
-        <label for="topP">Top P:</label>
-        <input type="range" step="0.01" min="0" max="1" class="form-control" id="topP" value="0.20" name="topP" onchange="this.nextElementSibling.value = this.value" required>
-        <output>0.20</output>
-    </div>
-    <div class="form-group">
-        <label for="topK">Top K:</label>
-        <input type="number" class="form-control" id="topK" value="10000" name="topK"  required>
-    </div>
-    <div class="form-group">
-        <label for="temperature">Temperature:</label>
-        <input type="range" step="0.01" min="0" max="1" value="0.9" class="form-control" id="temperature" name="temperature" onchange="this.nextElementSibling.value = this.value"  required>
-        <output>0.9</output>
-    </div>
-    <div class="form-group">
-        <label for="tokens">Tokens:</label>
-        <input type="number" class="form-control" id="tokens" name="tokens" value="128" required>
-    </div>
-    <button class="btn btn-primary" x-on:click="submitRequest()">Submit <i class="fas fa-paper-plane"></i></button>
-    <hr>
-    <div class="form-group">
-        <label for="outputText">Output Text:</label>
-        <textarea class="form-control" id="outputText" rows="5" readonly></textarea>
-    </div>
-</div>
-
-<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
-<script>
-    function submitRequest() {
-        var button = document.querySelector("i.fa-paper-plane");
-        button.classList.add("waiting");
-        var text = document.getElementById("inputText").value;
-        var url = "/predict";
-        var data = {
-            "text": text,
-            "topP": document.getElementById("topP").value,
-            "topK": document.getElementById("topK").value,
-            "temperature": document.getElementById("temperature").value,
-            "tokens": document.getElementById("tokens").value
-        };
-        fetch(url, {
-            method: "POST",
-            headers: {
-                "Content-Type": "application/json"
-            },
-            body: JSON.stringify(data)
-        })
-        .then(response => response.json())
-        .then(data => {
-            document.getElementById("outputText").value = data.prediction;
-            button.classList.remove("waiting");
-        })
-        .catch(error => { console.error(error); button.classList.remove("waiting"); });
-    }
-</script>
-</body>
-</html>
--- a/interactive.go
+++ b/interactive.go
@@ -1,142 +0,0 @@
-package main
-
-// A simple program demonstrating the text area component from the Bubbles
-// component library.
-
-import (
-	"fmt"
-	"strings"
-
-	"github.com/charmbracelet/bubbles/textarea"
-	"github.com/charmbracelet/bubbles/viewport"
-	tea "github.com/charmbracelet/bubbletea"
-	"github.com/charmbracelet/lipgloss"
-	llama "github.com/go-skynet/llama/go"
-)
-
-func startInteractive(l *llama.LLama, opts ...llama.PredictOption) error {
-	p := tea.NewProgram(initialModel(l, opts...))
-
-	_, err := p.Run()
-	return err
-}
-
-type (
-	errMsg error
-)
-
-type model struct {
-	viewport    viewport.Model
-	messages    *[]string
-	textarea    textarea.Model
-	senderStyle lipgloss.Style
-	err         error
-	l           *llama.LLama
-	opts        []llama.PredictOption
-
-	predictC chan string
-}
-
-func initialModel(l *llama.LLama, opts ...llama.PredictOption) model {
-	ta := textarea.New()
-	ta.Placeholder = "Send a message..."
-	ta.Focus()
-
-	ta.Prompt = "┃ "
-	ta.CharLimit = 280
-
-	ta.SetWidth(200)
-	ta.SetHeight(3)
-
-	// Remove cursor line styling
-	ta.FocusedStyle.CursorLine = lipgloss.NewStyle()
-
-	ta.ShowLineNumbers = false
-
-	vp := viewport.New(200, 5)
-	vp.SetContent(`Welcome to llama-cli. Type a message and press Enter to send. Alpaca doesn't keep context of the whole chat (yet).`)
-
-	ta.KeyMap.InsertNewline.SetEnabled(false)
-
-	predictChannel := make(chan string)
-	messages := []string{}
-	m := model{
-		textarea:    ta,
-		messages:    &messages,
-		viewport:    vp,
-		senderStyle: lipgloss.NewStyle().Foreground(lipgloss.Color("5")),
-		err:         nil,
-		l:           l,
-		opts:        opts,
-		predictC:    predictChannel,
-	}
-	go func() {
-		for p := range predictChannel {
-			str, _ := templateString(emptyInput, struct {
-				Instruction string
-				Input       string
-			}{Instruction: p})
-			res, _ := l.Predict(
-				str,
-				opts...,
-			)
-
-			mm := *m.messages
-			*m.messages = mm[:len(mm)-1]
-			*m.messages = append(*m.messages, m.senderStyle.Render("llama: ")+res)
-			m.viewport.SetContent(strings.Join(*m.messages, "\n"))
-			ta.Reset()
-			m.viewport.GotoBottom()
-		}
-	}()
-
-	return m
-}
-
-func (m model) Init() tea.Cmd {
-	return textarea.Blink
-}
-
-func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
-	var (
-		tiCmd tea.Cmd
-		vpCmd tea.Cmd
-	)
-
-	m.textarea, tiCmd = m.textarea.Update(msg)
-	m.viewport, vpCmd = m.viewport.Update(msg)
-
-	switch msg := msg.(type) {
-	case tea.WindowSizeMsg:
-
-	//	m.viewport.Width = msg.Width
-	//	m.viewport.Height = msg.Height
-	case tea.KeyMsg:
-		switch msg.Type {
-		case tea.KeyCtrlC, tea.KeyEsc:
-			fmt.Println(m.textarea.Value())
-			return m, tea.Quit
-		case tea.KeyEnter:
-			*m.messages = append(*m.messages, m.senderStyle.Render("You: ")+m.textarea.Value(), m.senderStyle.Render("Loading response..."))
-			m.predictC <- m.textarea.Value()
-			m.viewport.SetContent(strings.Join(*m.messages, "\n"))
-			m.textarea.Reset()
-			m.viewport.GotoBottom()
-		}
-
-	// We handle errors just like any other message
-	case errMsg:
-		m.err = msg
-		return m, nil
-	}
-
-	return m, tea.Batch(tiCmd, vpCmd)
-}
-
-func (m model) View() string {
-	return fmt.Sprintf(
-		"%s\n\n%s",
-		m.viewport.View(),
-		m.textarea.View(),
-	) + "\n\n"
-}
--- a/kubernetes/deployment.yaml
+++ b/kubernetes/deployment.yaml
@@ -1,42 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: llama
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llama
-  namespace: llama
-  labels:
-    app: llama
-spec:
-  selector:
-    matchLabels:
-      app: llama
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        app: llama
-      name: llama
-    spec:
-      containers:
-        - name: llama
-          args:
-          - api
-          image: quay.io/go-skynet/llama-cli:v0.3
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: llama
-  namespace: llama
-spec:
-  selector:
-    app: llama
-  type: LoadBalancer
-  ports:
-    - protocol: TCP
-      port: 8080
-      targetPort: 8080
--- a/main.go
+++ b/main.go
@@ -1,279 +0,0 @@
-package main
-
-import (
-	"bytes"
-	"fmt"
-	"io/ioutil"
-	"os"
-	"runtime"
-	"text/template"
-
-	llama "github.com/go-skynet/llama/go"
-	"github.com/urfave/cli/v2"
-)
-
-// Define the template string
-var emptyInput string = `Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-### Instruction:
-{{.Instruction}}
-
-### Response:`
-
-var nonEmptyInput string = `Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
-
-### Instruction:
-{{.Instruction}}
-
-### Input:
-{{.Input}}
-
-### Response:
-`
-
-func llamaFromOptions(ctx *cli.Context) (*llama.LLama, error) {
-	opts := []llama.ModelOption{llama.SetContext(ctx.Int("context-size"))}
-	if ctx.Bool("alpaca") {
-		opts = append(opts, llama.EnableAlpaca)
-	}
-	if ctx.Bool("gpt4all") {
-		opts = append(opts, llama.EnableGPT4All)
-	}
-	return llama.New(ctx.String("model"), opts...)
-}
-
-func templateString(t string, in interface{}) (string, error) {
-	// Parse the template
-	tmpl, err := template.New("prompt").Parse(t)
-	if err != nil {
-		return "", err
-	}
-
-	var buf bytes.Buffer
-	err = tmpl.Execute(&buf, in)
-	if err != nil {
-		return "", err
-	}
-	return buf.String(), nil
-}
-
-var modelFlags = []cli.Flag{
-	&cli.StringFlag{
-		Name:    "model",
-		EnvVars: []string{"MODEL_PATH"},
-	},
-	&cli.IntFlag{
-		Name:    "tokens",
-		EnvVars: []string{"TOKENS"},
-		Value:   128,
-	},
-	&cli.IntFlag{
-		Name:    "context-size",
-		EnvVars: []string{"CONTEXT_SIZE"},
-		Value:   512,
-	},
-	&cli.IntFlag{
-		Name:    "threads",
-		EnvVars: []string{"THREADS"},
-		Value:   runtime.NumCPU(),
-	},
-	&cli.Float64Flag{
-		Name:    "temperature",
-		EnvVars: []string{"TEMPERATURE"},
-		Value:   0.95,
-	},
-	&cli.Float64Flag{
-		Name:    "topp",
-		EnvVars: []string{"TOP_P"},
-		Value:   0.85,
-	},
-	&cli.IntFlag{
-		Name:    "topk",
-		EnvVars: []string{"TOP_K"},
-		Value:   20,
-	},
-	&cli.BoolFlag{
-		Name:    "alpaca",
-		EnvVars: []string{"ALPACA"},
-		Value:   true,
-	},
-	&cli.BoolFlag{
-		Name:    "gpt4all",
-		EnvVars: []string{"GPT4ALL"},
-		Value:   false,
-	},
-}
-
-func main() {
-	app := &cli.App{
-		Name:    "llama-cli",
-		Version: "0.1",
-		Usage:   "llama-cli --model ... --instruction 'What is an alpaca?'",
-		Flags: append(modelFlags,
-			&cli.StringFlag{
-				Name:    "template",
-				EnvVars: []string{"TEMPLATE"},
-			},
-			&cli.StringFlag{
-				Name:    "instruction",
-				EnvVars: []string{"INSTRUCTION"},
-			},
-			&cli.StringFlag{
-				Name:    "input",
-				EnvVars: []string{"INPUT"},
-			}),
-		Description: `Run llama.cpp inference`,
-		UsageText: `
-llama-cli --model ~/ggml-alpaca-7b-q4.bin --instruction "What's an alpaca?"
-
-	An Alpaca (Vicugna pacos) is a domesticated species of South American camelid, related to llamas and originally from Peru but now found throughout much of Andean region. They are bred for their fleeces which can be spun into wool or knitted items such as hats, sweaters, blankets etc
-		
-echo "An Alpaca (Vicugna pacos) is a domesticated species of South American camelid, related to llamas and originally from Peru but now found throughout much of Andean region. They are bred for their fleeces which can be spun into wool or knitted items such as hats, sweaters, blankets etc" | llama-cli --model ~/ggml-alpaca-7b-q4.bin --instruction "Proofread, improving clarity and flow" --input "-"
-
-	An Alpaca (Vicugna pacos) is a domesticated species from South America that's related to llamas. Originating in Peru but now found throughout the Andean region, they are bred for their fleeces which can be spun into wool or knitted items such as hats and sweaters—blankets too!
-`,
-		Copyright: "go-skynet authors",
-		Commands: []*cli.Command{
-			{
-				Flags: modelFlags,
-				Name:  "interactive",
-				Action: func(ctx *cli.Context) error {
-
-					l, err := llamaFromOptions(ctx)
-					if err != nil {
-						fmt.Println("Loading the model failed:", err.Error())
-						os.Exit(1)
-					}
-
-					return startInteractive(l, llama.SetTemperature(ctx.Float64("temperature")),
-						llama.SetTopP(ctx.Float64("topp")),
-						llama.SetTopK(ctx.Int("topk")),
-						llama.SetTokens(ctx.Int("tokens")),
-						llama.SetThreads(ctx.Int("threads")))
-				},
-			},
-			{
-
-				Name: "api",
-				Flags: []cli.Flag{
-					&cli.IntFlag{
-						Name:    "threads",
-						EnvVars: []string{"THREADS"},
-						Value:   runtime.NumCPU(),
-					},
-					&cli.StringFlag{
-						Name:    "model",
-						EnvVars: []string{"MODEL_PATH"},
-					},
-					&cli.StringFlag{
-						Name:    "address",
-						EnvVars: []string{"ADDRESS"},
-						Value:   ":8080",
-					},
-					&cli.BoolFlag{
-						Name:    "alpaca",
-						EnvVars: []string{"ALPACA"},
-						Value:   true,
-					},
-					&cli.BoolFlag{
-						Name:    "gpt4all",
-						EnvVars: []string{"GPT4ALL"},
-						Value:   false,
-					},
-					&cli.IntFlag{
-						Name:    "context-size",
-						EnvVars: []string{"CONTEXT_SIZE"},
-						Value:   512,
-					},
-				},
-				Action: func(ctx *cli.Context) error {
-					l, err := llamaFromOptions(ctx)
-					if err != nil {
-						fmt.Println("Loading the model failed:", err.Error())
-						os.Exit(1)
-					}
-
-					return api(l, ctx.String("address"), ctx.Int("threads"))
-				},
-			},
-		},
-		Action: func(ctx *cli.Context) error {
-
-			instruction := ctx.String("instruction")
-			input := ctx.String("input")
-			templ := ctx.String("template")
-
-			promptTemplate := ""
-
-			if input != "" {
-				promptTemplate = nonEmptyInput
-			} else {
-				promptTemplate = emptyInput
-			}
-
-			if templ != "" {
-				dat, err := os.ReadFile(templ)
-				if err != nil {
-					fmt.Printf("Failed reading file: %s", err.Error())
-					os.Exit(1)
-				}
-				promptTemplate = string(dat)
-			}
-
-			if instruction == "-" {
-				dat, err := ioutil.ReadAll(os.Stdin)
-				if err != nil {
-					fmt.Printf("reading stdin failed: %s", err)
-					os.Exit(1)
-				}
-				instruction = string(dat)
-			}
-
-			if input == "-" {
-				dat, err := ioutil.ReadAll(os.Stdin)
-				if err != nil {
-					fmt.Printf("reading stdin failed: %s", err)
-					os.Exit(1)
-				}
-				input = string(dat)
-			}
-
-			str, err := templateString(promptTemplate, struct {
-				Instruction string
-				Input       string
-			}{Instruction: instruction, Input: input})
-
-			if err != nil {
-				fmt.Println("Templating the input failed:", err.Error())
-				os.Exit(1)
-			}
-
-			l, err := llamaFromOptions(ctx)
-			if err != nil {
-				fmt.Println("Loading the model failed:", err.Error())
-				os.Exit(1)
-			}
-
-			res, err := l.Predict(
-				str,
-				llama.SetTemperature(ctx.Float64("temperature")),
-				llama.SetTopP(ctx.Float64("topp")),
-				llama.SetTopK(ctx.Int("topk")),
-				llama.SetTokens(ctx.Int("tokens")),
-				llama.SetThreads(ctx.Int("threads")),
-			)
-			if err != nil {
-				fmt.Printf("predicting failed: %s", err)
-				os.Exit(1)
-			}
-			fmt.Println(res)
-			return nil
-		},
-	}
-
-	err := app.Run(os.Args)
-	if err != nil {
-		fmt.Println(err)
-		os.Exit(1)
-	}
-}
Author	SHA1	Message	Date
Mauro Morales	eb137c8a84	Cleanup gh-pages branch Signed-off-by: Dimitris Karakasilis <dimitris@spectrocloud.com>	2023-04-26 11:55:04 +03:00
renovate[bot]	4e2061636e	chore(deps): update actions/checkout action to v3 (#82 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-25 07:46:29 +02:00
renovate[bot]	e3ef171968	fix(deps): update module github.com/gofiber/fiber/v2 to v2.44.0 (#81 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-25 07:46:14 +02:00
Ettore Di Giacinto	12d83a4184	feat: Return OpenAI errors and update docs (#80 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-24 23:42:03 +02:00
renovate[bot]	045412e8dd	fix(deps): update module github.com/urfave/cli/v2 to v2.25.1 (#78 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 18:16:23 +02:00
renovate[bot]	9896a9a58b	fix(deps): update github.com/go-skynet/go-llama.cpp digest to e45cebe (#77 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 18:16:10 +02:00
Ettore Di Giacinto	b9011bda59	feat: automatic updates with renovate, docs updates (#76 )	2023-04-24 18:10:58 +02:00
Ettore Di Giacinto	2b2f5fa36a	feat: update llama.cpp (#72 )	2023-04-24 14:15:49 +02:00
renovate[bot]	43c557dc5c	fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 1f7bff5 (#74 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:14:21 +02:00
renovate[bot]	7abb2c9bd7	fix(deps): update github.com/go-skynet/go-gpt2.cpp digest to 245a5bf (#73 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:13:04 +02:00
renovate[bot]	7a9ea4480a	Configure Renovate (#71 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:11:39 +02:00
Vladimir Malyutin	31bcc558de	Update README.md (#62 )	2023-04-22 14:42:30 +02:00
Ettore Di Giacinto	676e15f785	fix: make MacOS builds work (#61 )	2023-04-22 11:05:23 +02:00
Marc R Kellerman	3e71c90949	feature: add devcontainer for live debugging (#60 )	2023-04-22 01:20:03 +02:00
Ettore Di Giacinto	550ae9c968	docs: add Discord channel link (#59 )	2023-04-22 00:46:17 +02:00
Ettore Di Giacinto	1c872ec326	feat: add CI/tests (#58 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-22 00:44:52 +02:00
Marc R Kellerman	05f35b182c	fix(makefile): fix go-gpt2 folder and add verification before git clone (#51 ) Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-04-22 00:29:32 +02:00
Ettore Di Giacinto	79791438fe	Use the first available model if not specified (#55 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-21 22:54:43 +02:00
Tyler Gillson	bf20cc34f6	feat: Add helm chart (#56 )	2023-04-21 13:22:03 -07:00
Ettore Di Giacinto	5cba71de70	Add stopwords, debug mode, and other API enhancements (#54 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-21 19:46:59 +02:00
Ettore Di Giacinto	4b7e83056d	Update .env	2023-04-21 01:47:35 +02:00
Ettore Di Giacinto	ed954d66c3	Do not take all CPU by default (#50 )	2023-04-21 00:55:19 +02:00
Ettore Di Giacinto	f816dfae65	Add support for stablelm (#48 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-21 00:06:55 +02:00
Ettore Di Giacinto	142bcd66ca	Cleanup makefile, fix dep versions (#46 ) Signed-off-by: mudler <mudler@c3os.io>	2023-04-20 19:49:06 +02:00
Ettore Di Giacinto	1c4fbaae20	Add support for cerebras (#45 ) Signed-off-by: mudler <mudler@c3os.io>	2023-04-20 19:33:36 +02:00
Ettore Di Giacinto	d517a54e28	Major API enhancements (#44 )	2023-04-20 18:33:02 +02:00
Tyler Gillson	c905512bb0	Update example K8s manifests (#40 )	2023-04-20 18:31:11 +02:00
Ettore Di Giacinto	1254951fab	Add logo (#37 ) Signed-off-by: mudler <mudler@c3os.io>	2023-04-19 19:03:12 +02:00
Ettore Di Giacinto	80f50e6ccd	Rename project to LocalAI (#35 ) Signed-off-by: mudler <mudler@c3os.io>	2023-04-19 18:43:10 +02:00
Ettore Di Giacinto	7fec26f5d3	Enhancements (#34 ) Signed-off-by: mudler <mudler@c3os.io>	2023-04-19 17:10:29 +02:00
Ettore Di Giacinto	a9a875ee2b	⬆️ Bump llama.cpp (#33 ) Signed-off-by: mudler <mudler@c3os.io>	2023-04-17 21:34:02 +02:00
Ettore Di Giacinto	db5ac715f3	Use a reasonable default context size (#31 )	2023-04-17 18:45:42 +02:00
Ettore Di Giacinto	0b330d90ad	feat: drop embedded webui (#27 ) Signed-off-by: mudler <mudler@c3os.io>	2023-04-16 10:46:20 +02:00
Ettore Di Giacinto	63601fabd1	feat: drop default model and llama-specific API (#26 ) Signed-off-by: mudler <mudler@c3os.io>	2023-04-16 10:40:50 +02:00
Ettore Di Giacinto	1370b4482f	📖 Add prompt-templates examples (#25 ) Signed-off-by: mudler <mudler@c3os.io>	2023-04-16 10:24:15 +02:00
Ettore Di Giacinto	b062f3142b	feat: enhance API, expose more parameters (#24 ) Signed-off-by: mudler <mudler@c3os.io>	2023-04-16 10:16:48 +02:00
Marc R Kellerman	c37175271f	feature: makefile & updates (#23 ) Co-authored-by: mudler <mudler@c3os.io> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-04-15 16:39:07 -07:00
Ettore Di Giacinto	e8eab66c30	Merge pull request #22 from go-skynet/update-llama.cpp ⬆️ Update go-llama.cpp to `llama.cpp-2f7c8e0`	2023-04-16 00:06:52 +02:00
mudler	a73a497143	Update llama.cpp	2023-04-15 23:57:00 +02:00
Ettore Di Giacinto	6aea515e1d	Merge pull request #20 from go-skynet/mudler-patch-1 📖 Update README.md	2023-04-15 00:38:30 +02:00
Ettore Di Giacinto	dfc2b7e02a	📖 Update README.md	2023-04-15 00:38:18 +02:00
Ettore Di Giacinto	040290971c	Merge pull request #19 from go-skynet/tags Use tags for go-llama.cpp	2023-04-15 00:14:47 +02:00
mudler	553bad585e	Use tags for go-llama.cpp	2023-04-15 00:07:39 +02:00
Ettore Di Giacinto	f76b612506	Merge pull request #17 from go-skynet/mudler-patch-1 Fix comment typo	2023-04-13 15:21:13 +02:00
Ettore Di Giacinto	c4e94c88d7	Fix comment typo Thanks to @deadprogram for noticing it!	2023-04-13 15:20:51 +02:00
mudler	a9cd6b3ca3	ci: Fix tag detection for 'latest'	2023-04-13 01:37:09 +02:00
mudler	e786576b95	Update README	2023-04-13 01:28:15 +02:00
Ettore Di Giacinto	d426571789	Merge pull request #16 from go-skynet/fix_arm Drop armv7 builds	2023-04-13 01:21:58 +02:00
mudler	a896a2b5ad	Drop armv7 builds	2023-04-13 01:21:40 +02:00
Ettore Di Giacinto	8273cd5c04	Merge pull request #15 from go-skynet/docker-compose Add docker-compose file	2023-04-13 01:17:44 +02:00
mudler	16f1281d38	Minor workflow fixes	2023-04-13 01:16:13 +02:00
mudler	8042e9a2d6	Add docker-compose Fixes #14 Signed-off-by: mudler <mudler@c3os.io>	2023-04-13 01:13:14 +02:00
mudler	624092cb99	Update README	2023-04-12 00:07:30 +02:00
mudler	a422a883ac	Minor rephrasing	2023-04-12 00:04:15 +02:00
mudler	7858a97254	Update README	2023-04-12 00:02:47 +02:00
mudler	5556aa46dd	Small refinements and refactors	2023-04-12 00:02:39 +02:00
mudler	eb4257f946	Add .gitignore	2023-04-11 23:44:00 +02:00
mudler	ae30bd346d	Reorganize repository layout	2023-04-11 23:43:43 +02:00
mudler	93d8977ba2	Return model list	2023-04-10 12:02:40 +02:00
mudler	f43aeeb4a1	Add both API endpoints (completion, chat)	2023-04-09 12:30:55 +02:00
mudler	c17dcc5e9d	Allow to inject prompt as part of the call	2023-04-09 09:36:19 +02:00
mudler	4a932483e1	Small fixup to template loading	2023-04-08 11:59:40 +02:00
mudler	b710147b95	Add mutex on same models (parallel isn't supported yet)	2023-04-08 11:45:36 +02:00
mudler	ba70363330	Use template input	2023-04-08 11:24:25 +02:00
mudler	9fb581739b	Allow to template model prompts inputs	2023-04-08 10:46:51 +02:00
mudler	48aca246e3	Drop unused interactive mode	2023-04-07 11:31:14 +02:00
mudler	12eee097b7	Make it compatible with openAI api, support multiple models Signed-off-by: mudler <mudler@c3os.io>	2023-04-07 11:30:59 +02:00
mudler	b33d015b8c	Use go-llama.cpp	2023-04-07 10:08:15 +02:00