mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
1 Commits
v2.26.0
...
speculativ
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b16a01d0bd |
23
.bruno/LocalAI Test Requests/Sound Generation/musicgen.bru
Normal file
23
.bruno/LocalAI Test Requests/Sound Generation/musicgen.bru
Normal file
@@ -0,0 +1,23 @@
|
||||
meta {
|
||||
name: musicgen
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model_id": "facebook/musicgen-small",
|
||||
"text": "Exciting 80s Newscast Interstitial",
|
||||
"duration_seconds": 8
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
meta {
|
||||
name: backend monitor
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
meta {
|
||||
name: backend-shutdown
|
||||
type: http
|
||||
seq: 3
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}"
|
||||
}
|
||||
}
|
||||
5
.bruno/LocalAI Test Requests/bruno.json
Normal file
5
.bruno/LocalAI Test Requests/bruno.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"version": "1",
|
||||
"name": "LocalAI Test Requests",
|
||||
"type": "collection"
|
||||
}
|
||||
6
.bruno/LocalAI Test Requests/environments/localhost.bru
Normal file
6
.bruno/LocalAI Test Requests/environments/localhost.bru
Normal file
@@ -0,0 +1,6 @@
|
||||
vars {
|
||||
HOST: localhost
|
||||
PORT: 8080
|
||||
DEFAULT_MODEL: gpt-3.5-turbo
|
||||
PROTOCOL: http://
|
||||
}
|
||||
11
.bruno/LocalAI Test Requests/get models list.bru
Normal file
11
.bruno/LocalAI Test Requests/get models list.bru
Normal file
@@ -0,0 +1,11 @@
|
||||
meta {
|
||||
name: get models list
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
meta {
|
||||
name: Generate image
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"prompt": "<positive prompt>|<negative prompt>",
|
||||
"model": "model-name",
|
||||
"step": 51,
|
||||
"size": "1024x1024",
|
||||
"image": ""
|
||||
}
|
||||
}
|
||||
24
.bruno/LocalAI Test Requests/llm text/-completions.bru
Normal file
24
.bruno/LocalAI Test Requests/llm text/-completions.bru
Normal file
@@ -0,0 +1,24 @@
|
||||
meta {
|
||||
name: -completions
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"prompt": "function downloadFile(string url, string outputPath) {",
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.5
|
||||
}
|
||||
}
|
||||
23
.bruno/LocalAI Test Requests/llm text/-edits.bru
Normal file
23
.bruno/LocalAI Test Requests/llm text/-edits.bru
Normal file
@@ -0,0 +1,23 @@
|
||||
meta {
|
||||
name: -edits
|
||||
type: http
|
||||
seq: 5
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "What day of the wek is it?",
|
||||
"instruction": "Fix the spelling mistakes"
|
||||
}
|
||||
}
|
||||
22
.bruno/LocalAI Test Requests/llm text/-embeddings.bru
Normal file
22
.bruno/LocalAI Test Requests/llm text/-embeddings.bru
Normal file
@@ -0,0 +1,22 @@
|
||||
meta {
|
||||
name: -embeddings
|
||||
type: http
|
||||
seq: 6
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
meta {
|
||||
name: chat completion -simple- 1 message-
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How could one use friction to cook an egg?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.2,
|
||||
"grammar": ""
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
meta {
|
||||
name: chat-completions -long-
|
||||
type: http
|
||||
seq: 5
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
|
||||
{"role": "user", "content": "How could one use electricity to cook an egg?"},
|
||||
{"role": "assistant",
|
||||
"content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
|
||||
},
|
||||
{"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.5
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
meta {
|
||||
name: chat-completions -stream-
|
||||
type: http
|
||||
seq: 6
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.9,
|
||||
"stream": true
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
meta {
|
||||
name: add model gallery
|
||||
type: http
|
||||
seq: 10
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
|
||||
"name": "test"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
meta {
|
||||
name: delete model gallery
|
||||
type: http
|
||||
seq: 11
|
||||
}
|
||||
|
||||
delete {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"name": "test"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
meta {
|
||||
name: list MODELS in galleries
|
||||
type: http
|
||||
seq: 7
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
meta {
|
||||
name: list model GALLERIES
|
||||
type: http
|
||||
seq: 8
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
11
.bruno/LocalAI Test Requests/model gallery/model delete.bru
Normal file
11
.bruno/LocalAI Test Requests/model gallery/model delete.bru
Normal file
@@ -0,0 +1,11 @@
|
||||
meta {
|
||||
name: model delete
|
||||
type: http
|
||||
seq: 7
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
meta {
|
||||
name: model gallery apply -gist-
|
||||
type: http
|
||||
seq: 12
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
meta {
|
||||
name: model gallery apply
|
||||
type: http
|
||||
seq: 9
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
|
||||
"name": "codellama7b"
|
||||
}
|
||||
}
|
||||
BIN
.bruno/LocalAI Test Requests/transcription/gb1.ogg
Normal file
BIN
.bruno/LocalAI Test Requests/transcription/gb1.ogg
Normal file
Binary file not shown.
16
.bruno/LocalAI Test Requests/transcription/transcribe.bru
Normal file
16
.bruno/LocalAI Test Requests/transcription/transcribe.bru
Normal file
@@ -0,0 +1,16 @@
|
||||
meta {
|
||||
name: transcribe
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
|
||||
body: multipartForm
|
||||
auth: none
|
||||
}
|
||||
|
||||
body:multipart-form {
|
||||
file: @file(transcription/gb1.ogg)
|
||||
model: whisper-1
|
||||
}
|
||||
22
.bruno/LocalAI Test Requests/tts/-tts.bru
Normal file
22
.bruno/LocalAI Test Requests/tts/-tts.bru
Normal file
@@ -0,0 +1,22 @@
|
||||
meta {
|
||||
name: -tts
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||
}
|
||||
}
|
||||
23
.bruno/LocalAI Test Requests/tts/musicgen.bru
Normal file
23
.bruno/LocalAI Test Requests/tts/musicgen.bru
Normal file
@@ -0,0 +1,23 @@
|
||||
meta {
|
||||
name: musicgen
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"backend": "transformers",
|
||||
"model": "facebook/musicgen-small",
|
||||
"input": "80s Synths playing Jazz"
|
||||
}
|
||||
}
|
||||
2
.github/labeler.yml
vendored
2
.github/labeler.yml
vendored
@@ -1,4 +1,4 @@
|
||||
enhancement:
|
||||
enhancements:
|
||||
- head-branch: ['^feature', 'feature']
|
||||
|
||||
dependencies:
|
||||
|
||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@v2.3.0
|
||||
uses: dependabot/fetch-metadata@v2.2.0
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
skip-commit-verification: true
|
||||
|
||||
9
.github/workflows/generate_grpc_cache.yaml
vendored
9
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -2,10 +2,9 @@ name: 'generate and publish GRPC docker caches'
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
schedule:
|
||||
# daily at midnight
|
||||
- cron: '0 0 * * *'
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -17,7 +16,7 @@ jobs:
|
||||
matrix:
|
||||
include:
|
||||
- grpc-base-image: ubuntu:22.04
|
||||
runs-on: 'arc-runner-set'
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
steps:
|
||||
|
||||
4
.github/workflows/notify-models.yaml
vendored
4
.github/workflows/notify-models.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)
|
||||
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
44
Makefile
44
Makefile
@@ -6,7 +6,9 @@ BINARY_NAME=local-ai
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
CPPLLAMA_VERSION?=300907b2110cc17b4337334dc397e05de2d8f5e0
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
@@ -22,7 +24,7 @@ BARKCPP_VERSION?=v1.0.0
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=d46ed5e184b97c2018dc2e8105925bdb8775e02c
|
||||
STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
|
||||
|
||||
ONNX_VERSION?=1.20.0
|
||||
ONNX_ARCH?=x64
|
||||
@@ -149,6 +151,7 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
# llama-ggml has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
export GGML_HIP=1
|
||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
@@ -185,6 +188,7 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
@@ -218,6 +222,19 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## go-llama.cpp
|
||||
sources/go-llama.cpp:
|
||||
mkdir -p sources/go-llama.cpp
|
||||
cd sources/go-llama.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(GOLLAMA_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(GOLLAMA_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
## bark.cpp
|
||||
sources/bark.cpp:
|
||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||
@@ -293,17 +310,19 @@ sources/whisper.cpp:
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||
|
||||
prepare-sources: get-sources replace
|
||||
$(GOCMD) mod download
|
||||
@@ -311,6 +330,7 @@ prepare-sources: get-sources replace
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(GOCMD) clean -cache
|
||||
$(MAKE) -C sources/go-llama.cpp clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) build
|
||||
@@ -414,7 +434,7 @@ run: prepare ## run local-ai
|
||||
test-models/testmodel.ggml:
|
||||
mkdir test-models
|
||||
mkdir test-dir
|
||||
wget -q https://huggingface.co/RichardErkhov/Qwen_-_Qwen2-1.5B-Instruct-gguf/resolve/main/Qwen2-1.5B-Instruct.Q2_K.gguf -O test-models/testmodel.ggml
|
||||
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
|
||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||
@@ -429,7 +449,8 @@ test: prepare test-models/testmodel.ggml grpcs
|
||||
export GO_TAGS="tts debug"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(MAKE) test-llama
|
||||
$(MAKE) test-llama-gguf
|
||||
$(MAKE) test-tts
|
||||
$(MAKE) test-stablediffusion
|
||||
@@ -458,6 +479,10 @@ teardown-e2e:
|
||||
rm -rf $(TEST_DIR) || true
|
||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||
|
||||
test-llama: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
|
||||
test-llama-gguf: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
@@ -735,6 +760,13 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/llama-ggml
|
||||
endif
|
||||
|
||||
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
||||
@@ -829,7 +861,7 @@ swagger:
|
||||
|
||||
.PHONY: gen-assets
|
||||
gen-assets:
|
||||
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
|
||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
||||
|
||||
## Documentation
|
||||
docs/layouts/_default:
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
backend: silero-vad
|
||||
name: silero-vad
|
||||
parameters:
|
||||
model: silero-vad.onnx
|
||||
download_files:
|
||||
- filename: silero-vad.onnx
|
||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||
@@ -129,7 +129,7 @@ detect_gpu
|
||||
detect_gpu_size
|
||||
|
||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||
|
||||
check_vars
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
backend: silero-vad
|
||||
name: silero-vad
|
||||
parameters:
|
||||
model: silero-vad.onnx
|
||||
download_files:
|
||||
- filename: silero-vad.onnx
|
||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||
@@ -1,8 +0,0 @@
|
||||
backend: silero-vad
|
||||
name: silero-vad
|
||||
parameters:
|
||||
model: silero-vad.onnx
|
||||
download_files:
|
||||
- filename: silero-vad.onnx
|
||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||
@@ -163,11 +163,6 @@ message Reply {
|
||||
double timing_token_generation = 5;
|
||||
}
|
||||
|
||||
message GrammarTrigger {
|
||||
string word = 1;
|
||||
bool at_start = 2;
|
||||
}
|
||||
|
||||
message ModelOptions {
|
||||
string Model = 1;
|
||||
int32 ContextSize = 2;
|
||||
@@ -252,8 +247,6 @@ message ModelOptions {
|
||||
|
||||
string CacheTypeKey = 63;
|
||||
string CacheTypeValue = 64;
|
||||
|
||||
repeated GrammarTrigger GrammarTriggers = 65;
|
||||
}
|
||||
|
||||
message Result {
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "backend.grpc.pb.h"
|
||||
#include "utils.hpp"
|
||||
#include "sampling.h"
|
||||
#include "speculative.h"
|
||||
// include std::regex
|
||||
#include <cstddef>
|
||||
#include <thread>
|
||||
@@ -185,12 +186,45 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
|
||||
return out;
|
||||
}
|
||||
|
||||
struct llama_slot_params {
|
||||
uint32_t seed = -1; // RNG seed
|
||||
bool stream = true;
|
||||
bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt
|
||||
bool return_tokens = false;
|
||||
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_discard = 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_indent = 0; // mininum line indentation for the generated text in number of whitespace characters
|
||||
|
||||
int64_t t_max_prompt_ms = -1; // TODO: implement
|
||||
int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
|
||||
|
||||
std::vector<common_adapter_lora_info> lora;
|
||||
|
||||
std::vector<std::string> antiprompt;
|
||||
std::vector<std::string> response_fields;
|
||||
bool timings_per_token = false;
|
||||
bool post_sampling_probs = false;
|
||||
bool ignore_eos = false;
|
||||
|
||||
json input_prefix;
|
||||
json input_suffix;
|
||||
|
||||
struct common_params_sampling sampling;
|
||||
struct common_params_speculative speculative;
|
||||
};
|
||||
|
||||
|
||||
struct llama_client_slot
|
||||
{
|
||||
int id;
|
||||
int task_id = -1;
|
||||
|
||||
struct slot_params params;
|
||||
struct llama_slot_params params;
|
||||
common_speculative * spec = nullptr;
|
||||
llama_batch batch_spec = {};
|
||||
|
||||
|
||||
slot_state state = IDLE;
|
||||
slot_command command = NONE;
|
||||
@@ -283,6 +317,7 @@ struct llama_client_slot
|
||||
images.clear();
|
||||
}
|
||||
|
||||
|
||||
bool has_budget(common_params &global_params) {
|
||||
if (params.n_predict == -1 && global_params.n_predict == -1)
|
||||
{
|
||||
@@ -454,6 +489,10 @@ struct llama_server_context
|
||||
{
|
||||
llama_model *model = nullptr;
|
||||
llama_context *ctx = nullptr;
|
||||
common_init_result llama_init_dft;
|
||||
llama_context * ctx_dft = nullptr;
|
||||
llama_model * model_dft = nullptr;
|
||||
llama_context_params cparams_dft;
|
||||
const llama_vocab * vocab = nullptr;
|
||||
|
||||
clip_ctx *clp_ctx = nullptr;
|
||||
@@ -468,9 +507,6 @@ struct llama_server_context
|
||||
bool add_bos_token = true;
|
||||
bool has_eos_token = true;
|
||||
|
||||
bool grammar_lazy = false;
|
||||
std::vector<common_grammar_trigger> grammar_trigger_words;
|
||||
|
||||
int32_t n_ctx; // total context for all clients / slots
|
||||
|
||||
// system prompt
|
||||
@@ -505,6 +541,7 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool load_model(const common_params ¶ms_)
|
||||
{
|
||||
params = params_;
|
||||
@@ -548,6 +585,45 @@ struct llama_server_context
|
||||
add_bos_token = llama_vocab_get_add_bos(vocab);
|
||||
has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
|
||||
|
||||
if (!params.speculative.model.empty()) {
|
||||
LOG("loading draft model '%s'\n", params.speculative.model.c_str());
|
||||
|
||||
auto params_dft = params;
|
||||
|
||||
params_dft.devices = params.speculative.devices;
|
||||
params_dft.model = params.speculative.model;
|
||||
params_dft.n_ctx = params.speculative.n_ctx == 0 ? params.n_ctx / params.n_parallel : params.speculative.n_ctx;
|
||||
params_dft.n_gpu_layers = params.speculative.n_gpu_layers;
|
||||
params_dft.n_parallel = 1;
|
||||
|
||||
llama_init_dft = common_init_from_params(params_dft);
|
||||
|
||||
model_dft = llama_init_dft.model.get();
|
||||
|
||||
if (model_dft == nullptr) {
|
||||
LOG("failed to load draft model, '%s'\n", params.speculative.model.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!common_speculative_are_compatible(ctx, llama_init_dft.context.get())) {
|
||||
LOG("the draft model '%s' is not compatible with the target model '%s'\n", params.speculative.model.c_str(), params.model.c_str());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
const int n_ctx_dft = llama_n_ctx(llama_init_dft.context.get());
|
||||
|
||||
cparams_dft = common_context_params_to_llama(params_dft);
|
||||
cparams_dft.n_batch = n_ctx_dft;
|
||||
|
||||
// force F16 KV cache for the draft model for extra performance
|
||||
cparams_dft.type_k = GGML_TYPE_F16;
|
||||
cparams_dft.type_v = GGML_TYPE_F16;
|
||||
|
||||
// the context is not needed - we will create one for each slot
|
||||
llama_init_dft.context.reset();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -576,6 +652,22 @@ struct llama_server_context
|
||||
slot.n_ctx = n_ctx_slot;
|
||||
slot.n_predict = params.n_predict;
|
||||
|
||||
if (model_dft) {
|
||||
slot.batch_spec = llama_batch_init(params.speculative.n_max + 1, 0, 1);
|
||||
|
||||
ctx_dft = llama_init_from_model(model_dft, cparams_dft);
|
||||
if (ctx_dft == nullptr) {
|
||||
LOG("%s", "failed to create draft context\n");
|
||||
return;
|
||||
}
|
||||
|
||||
slot.spec = common_speculative_init(ctx_dft);
|
||||
if (slot.spec == nullptr) {
|
||||
LOG("%s", "failed to create speculator\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_INFO("new slot", {
|
||||
{"slot_id", slot.id},
|
||||
{"n_ctx_slot", slot.n_ctx}
|
||||
@@ -684,9 +776,11 @@ struct llama_server_context
|
||||
}
|
||||
|
||||
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
||||
slot_params default_params;
|
||||
llama_slot_params default_params;
|
||||
common_params_sampling default_sparams;
|
||||
|
||||
|
||||
default_sparams.speculative = params_base.speculative;
|
||||
|
||||
slot->params.stream = json_value(data, "stream", false);
|
||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
||||
@@ -709,8 +803,15 @@ struct llama_server_context
|
||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||
slot->sparams.grammar_trigger_words = grammar_trigger_words;
|
||||
slot->sparams.grammar_lazy = grammar_lazy;
|
||||
|
||||
|
||||
slot->sparams.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
|
||||
slot->sparams.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);
|
||||
slot->sparams.speculative.p_min = json_value(data, "speculative.p_min", defaults.speculative.p_min);
|
||||
|
||||
slot->sparams.speculative.n_min = std::min(params.speculative.n_max, params.speculative.n_min);
|
||||
slot->sparams.speculative.n_min = std::max(params.speculative.n_min, 2);
|
||||
slot->sparams.speculative.n_max = std::max(params.speculative.n_max, 0);
|
||||
|
||||
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
||||
// Might be better to reject the request with a 400 ?
|
||||
@@ -1155,14 +1256,6 @@ struct llama_server_context
|
||||
slot.has_next_token = false;
|
||||
}
|
||||
|
||||
if (slot.n_past >= slot.n_ctx) {
|
||||
slot.truncated = true;
|
||||
slot.stopped_limit = true;
|
||||
slot.has_next_token = false;
|
||||
|
||||
LOG_VERBOSE("stopped due to running out of context capacity", {});
|
||||
}
|
||||
|
||||
if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
|
||||
{
|
||||
slot.stopped_eos = true;
|
||||
@@ -1635,17 +1728,17 @@ struct llama_server_context
|
||||
{
|
||||
if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx)
|
||||
{
|
||||
// this check is redundant (for good)
|
||||
// we should never get here, because generation should already stopped in process_token()
|
||||
|
||||
// START LOCALAI changes
|
||||
// Temporary disable context-shifting as it can lead to infinite loops (issue: https://github.com/ggerganov/llama.cpp/issues/3969)
|
||||
// See: https://github.com/mudler/LocalAI/issues/1333
|
||||
// Context is exhausted, release the slot
|
||||
slot.release();
|
||||
send_final_response(slot);
|
||||
slot.has_next_token = false;
|
||||
LOG_ERROR("context is exhausted, release the slot", {});
|
||||
slot.cache_tokens.clear();
|
||||
slot.n_past = 0;
|
||||
slot.truncated = false;
|
||||
slot.has_next_token = true;
|
||||
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||
|
||||
continue;
|
||||
// END LOCALAI changes
|
||||
@@ -2037,6 +2130,97 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
// do speculative decoding
|
||||
for (auto & slot : slots) {
|
||||
if (!slot.is_processing() || !(ctx_dft && params.speculative.n_max > 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (slot.state != PROCESSING) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// determine the max draft that fits the current slot state
|
||||
int n_draft_max = slot.params.speculative.n_max;
|
||||
|
||||
// note: n_past is not yet increased for the `id` token sampled above
|
||||
// also, need to leave space for 1 extra token to allow context shifts
|
||||
n_draft_max = std::min(n_draft_max, slot.n_ctx - slot.n_past - 2);
|
||||
|
||||
if (slot.n_remaining > 0) {
|
||||
n_draft_max = std::min(n_draft_max, slot.n_remaining - 1);
|
||||
}
|
||||
|
||||
LOG("max possible draft: %d\n", n_draft_max);
|
||||
|
||||
if (n_draft_max < slot.params.speculative.n_min) {
|
||||
LOG("the max possible draft is too small: %d < %d - skipping speculative decoding\n", n_draft_max, slot.params.speculative.n_min);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
llama_token id = slot.sampled;
|
||||
|
||||
struct common_speculative_params params_spec;
|
||||
params_spec.n_draft = n_draft_max;
|
||||
params_spec.n_reuse = llama_n_ctx(ctx_dft) - slot.params.speculative.n_max;
|
||||
params_spec.p_min = slot.params.speculative.p_min;
|
||||
|
||||
llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, slot.cache_tokens, id);
|
||||
|
||||
// ignore small drafts
|
||||
if (slot.params.speculative.n_min > (int) draft.size()) {
|
||||
LOG("ignoring small draft: %d < %d\n", (int) draft.size(), slot.params.speculative.n_min);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// construct the speculation batch
|
||||
common_batch_clear(slot.batch_spec);
|
||||
common_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true);
|
||||
|
||||
for (size_t i = 0; i < draft.size(); ++i) {
|
||||
common_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id }, true);
|
||||
}
|
||||
|
||||
LOG("decoding speculative batch, size = %d\n", slot.batch_spec.n_tokens);
|
||||
|
||||
llama_decode(ctx, slot.batch_spec);
|
||||
|
||||
// the accepted tokens from the speculation
|
||||
const auto ids = common_sampler_sample_and_accept_n(slot.ctx_sampling, ctx, draft);
|
||||
|
||||
slot.n_past += ids.size();
|
||||
slot.n_decoded += ids.size();
|
||||
|
||||
slot.cache_tokens.push_back(id);
|
||||
slot.cache_tokens.insert(slot.cache_tokens.end(), ids.begin(), ids.end() - 1);
|
||||
|
||||
llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1);
|
||||
|
||||
for (size_t i = 0; i < ids.size(); ++i) {
|
||||
completion_token_output result;
|
||||
|
||||
result.tok = ids[i];
|
||||
result.text_to_send = common_token_to_piece(ctx, result.tok, params.special);
|
||||
//result.prob = 1.0f; // set later
|
||||
|
||||
// TODO: set result.probs
|
||||
|
||||
if (!process_token(result, slot)) {
|
||||
// release slot because of stop condition
|
||||
slot.release();
|
||||
slot.print_timings();
|
||||
send_final_response(slot);
|
||||
metrics.on_prediction(slot);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LOG("accepted %d/%d draft tokens, new n_past = %d\n", (int) ids.size() - 1, (int) draft.size(), slot.n_past);
|
||||
}
|
||||
|
||||
|
||||
LOG_VERBOSE("slots updated", {});
|
||||
return true;
|
||||
}
|
||||
@@ -2309,6 +2493,30 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
params.cpuparams.n_threads = request->threads();
|
||||
params.n_gpu_layers = request->ngpulayers();
|
||||
params.n_batch = request->nbatch();
|
||||
params.speculative.model = request->draftmodel();
|
||||
|
||||
// If options is not NULL, parse options
|
||||
for (int i = 0; request->options()[i] != NULL; i++) {
|
||||
char *optname = strtok(request->options()[i], ":");
|
||||
char *optval = strtok(NULL, ":");
|
||||
if (optval == NULL) {
|
||||
optval = "true";
|
||||
}
|
||||
|
||||
if (!strcmp(optname, "speculative.n_gpu_layers")) {
|
||||
params.speculative.n_gpu_layers = std::stoi(optval);
|
||||
}
|
||||
if (!strcmp(optname, "speculative.n_ctx")) {
|
||||
params.speculative.n_ctx = std::stoi(optval);
|
||||
}
|
||||
}
|
||||
|
||||
if params.speculative.n_gpu_layers == 0 {
|
||||
params.speculative.n_gpu_layers = params.n_gpu_layers;
|
||||
}
|
||||
if params.speculative.n_ctx == 0 {
|
||||
params.speculative.n_ctx = params.n_ctx;
|
||||
}
|
||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||
//params.n_parallel = 1;
|
||||
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
||||
@@ -2387,21 +2595,6 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
if ( request->ropefreqscale() != 0.0f ) {
|
||||
params.rope_freq_scale = request->ropefreqscale();
|
||||
}
|
||||
|
||||
if (request->grammartriggers_size() > 0) {
|
||||
LOG_INFO("configuring grammar triggers", {});
|
||||
llama.grammar_lazy = true;
|
||||
for (int i = 0; i < request->grammartriggers_size(); i++) {
|
||||
common_grammar_trigger trigger;
|
||||
trigger.word = request->grammartriggers(i).word();
|
||||
trigger.at_start = request->grammartriggers(i).at_start();
|
||||
llama.grammar_trigger_words.push_back(trigger);
|
||||
LOG_INFO("grammar trigger", {
|
||||
{ "word", trigger.word },
|
||||
{ "at_start", trigger.at_start }
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2550,18 +2743,6 @@ public:
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
|
||||
json data = parse_options(false, request, llama);
|
||||
|
||||
std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
|
||||
|
||||
for (int i=0 ; i< tokens.size(); i++){
|
||||
response->add_tokens(tokens[i]);
|
||||
}
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||
llama_client_slot* active_slot = llama.get_active_slot();
|
||||
|
||||
|
||||
204
backend/go/llm/llama-ggml/llama.go
Normal file
204
backend/go/llm/llama-ggml/llama.go
Normal file
@@ -0,0 +1,204 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.SingleThread
|
||||
|
||||
llama *llama.LLama
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
ropeFreqBase := float32(10000)
|
||||
ropeFreqScale := float32(1)
|
||||
|
||||
if opts.RopeFreqBase != 0 {
|
||||
ropeFreqBase = opts.RopeFreqBase
|
||||
}
|
||||
if opts.RopeFreqScale != 0 {
|
||||
ropeFreqScale = opts.RopeFreqScale
|
||||
}
|
||||
|
||||
llamaOpts := []llama.ModelOption{
|
||||
llama.WithRopeFreqBase(ropeFreqBase),
|
||||
llama.WithRopeFreqScale(ropeFreqScale),
|
||||
}
|
||||
|
||||
if opts.NGQA != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
|
||||
}
|
||||
|
||||
if opts.RMSNormEps != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
|
||||
}
|
||||
|
||||
if opts.ContextSize != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
|
||||
}
|
||||
if opts.F16Memory {
|
||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||
}
|
||||
if opts.Embeddings {
|
||||
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
||||
}
|
||||
if opts.NGPULayers != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
|
||||
}
|
||||
|
||||
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
|
||||
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
|
||||
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
|
||||
if opts.NBatch != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
|
||||
} else {
|
||||
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
|
||||
}
|
||||
|
||||
if opts.NUMA {
|
||||
llamaOpts = append(llamaOpts, llama.EnableNUMA)
|
||||
}
|
||||
|
||||
if opts.LowVRAM {
|
||||
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
|
||||
}
|
||||
|
||||
model, err := llama.New(opts.ModelFile, llamaOpts...)
|
||||
llm.llama = model
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
|
||||
ropeFreqBase := float32(10000)
|
||||
ropeFreqScale := float32(1)
|
||||
|
||||
if opts.RopeFreqBase != 0 {
|
||||
ropeFreqBase = opts.RopeFreqBase
|
||||
}
|
||||
if opts.RopeFreqScale != 0 {
|
||||
ropeFreqScale = opts.RopeFreqScale
|
||||
}
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(opts.Temperature),
|
||||
llama.SetTopP(opts.TopP),
|
||||
llama.SetTopK(int(opts.TopK)),
|
||||
llama.SetTokens(int(opts.Tokens)),
|
||||
llama.SetThreads(int(opts.Threads)),
|
||||
llama.WithGrammar(opts.Grammar),
|
||||
llama.SetRopeFreqBase(ropeFreqBase),
|
||||
llama.SetRopeFreqScale(ropeFreqScale),
|
||||
llama.SetNegativePromptScale(opts.NegativePromptScale),
|
||||
llama.SetNegativePrompt(opts.NegativePrompt),
|
||||
}
|
||||
|
||||
if opts.PromptCacheAll {
|
||||
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
|
||||
}
|
||||
|
||||
if opts.PromptCacheRO {
|
||||
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
|
||||
}
|
||||
|
||||
// Expected absolute path
|
||||
if opts.PromptCachePath != "" {
|
||||
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
|
||||
}
|
||||
|
||||
if opts.Mirostat != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
|
||||
}
|
||||
|
||||
if opts.MirostatETA != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
|
||||
}
|
||||
|
||||
if opts.MirostatTAU != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
|
||||
}
|
||||
|
||||
if opts.Debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
|
||||
|
||||
if opts.PresencePenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
|
||||
}
|
||||
|
||||
if opts.NKeep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
|
||||
}
|
||||
|
||||
if opts.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
|
||||
}
|
||||
|
||||
if opts.F16KV {
|
||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||
}
|
||||
|
||||
if opts.IgnoreEOS {
|
||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||
}
|
||||
|
||||
if opts.Seed != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
|
||||
}
|
||||
|
||||
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
|
||||
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
|
||||
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
|
||||
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
|
||||
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
|
||||
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
|
||||
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
|
||||
return predictOptions
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
|
||||
results <- token
|
||||
return true
|
||||
}))
|
||||
|
||||
go func() {
|
||||
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
|
||||
if err != nil {
|
||||
fmt.Println("err: ", err)
|
||||
}
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
if len(opts.EmbeddingTokens) > 0 {
|
||||
tokens := []int{}
|
||||
for _, t := range opts.EmbeddingTokens {
|
||||
tokens = append(tokens, int(t))
|
||||
}
|
||||
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
|
||||
}
|
||||
|
||||
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
|
||||
}
|
||||
19
backend/go/llm/llama-ggml/main.go
Normal file
19
backend/go/llm/llama-ggml/main.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,4 +1,4 @@
|
||||
bark==0.1.5
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
packaging==24.1
|
||||
@@ -159,18 +159,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
torchType = torch.float16
|
||||
variant = "fp16"
|
||||
|
||||
options = request.Options
|
||||
|
||||
# empty dict
|
||||
self.options = {}
|
||||
|
||||
# The options are a list of strings in this form optname:optvalue
|
||||
# We are storing all the options in a dict so we can use it later when
|
||||
# generating the images
|
||||
for opt in options:
|
||||
key, value = opt.split(":")
|
||||
self.options[key] = value
|
||||
|
||||
local = False
|
||||
modelFile = request.Model
|
||||
|
||||
@@ -453,9 +441,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
||||
kwargs = {key: options.get(key) for key in keys if key in options}
|
||||
|
||||
# populate kwargs from self.options.
|
||||
kwargs.update(self.options)
|
||||
|
||||
# Set seed
|
||||
if request.seed > 0:
|
||||
kwargs["generator"] = torch.Generator(device=self.device).manual_seed(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
setuptools
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
pillow
|
||||
protobuf
|
||||
certifi
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
wheel
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
phonemizer
|
||||
scipy
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -5,4 +5,4 @@ accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.4.1
|
||||
sentence-transformers==3.3.1
|
||||
@@ -6,4 +6,4 @@ accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.4.1
|
||||
sentence-transformers==3.3.1
|
||||
|
||||
@@ -5,4 +5,4 @@ numba==0.60.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.4.1
|
||||
sentence-transformers==3.3.1
|
||||
|
||||
@@ -7,4 +7,4 @@ numba==0.60.0
|
||||
bitsandbytes
|
||||
outetts
|
||||
bitsandbytes
|
||||
sentence-transformers==3.4.1
|
||||
sentence-transformers==3.3.1
|
||||
|
||||
@@ -8,4 +8,4 @@ numba==0.60.0
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.4.1
|
||||
sentence-transformers==3.3.1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.70.0
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
@@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||
log.Error().Err(err).Msg("error installing models")
|
||||
}
|
||||
|
||||
@@ -145,7 +145,13 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
|
||||
if options.LoadToMemory != nil {
|
||||
for _, m := range options.LoadToMemory {
|
||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
|
||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
|
||||
config.LoadOptionDebug(options.Debug),
|
||||
config.LoadOptionThreads(options.Threads),
|
||||
config.LoadOptionContextSize(options.ContextSize),
|
||||
config.LoadOptionF16(options.F16),
|
||||
config.ModelPath(options.ModelPath),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ type TokenUsage struct {
|
||||
TimingTokenGeneration float64
|
||||
}
|
||||
|
||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
modelFile := c.Model
|
||||
|
||||
// Check if the modelFile exists, if it doesn't try to load it from the gallery
|
||||
@@ -48,7 +48,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
}
|
||||
}
|
||||
|
||||
opts := ModelOptions(*c, o)
|
||||
opts := ModelOptions(c, o)
|
||||
inferenceModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -84,7 +84,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
|
||||
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
||||
fn := func() (LLMResponse, error) {
|
||||
opts := gRPCPredictOpts(*c, loader.ModelPath)
|
||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||
opts.Prompt = s
|
||||
opts.Messages = protoMessages
|
||||
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
|
||||
|
||||
@@ -118,19 +118,9 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
nGPULayers = *c.NGPULayers
|
||||
}
|
||||
|
||||
triggers := make([]*pb.GrammarTrigger, 0)
|
||||
for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
|
||||
triggers = append(triggers, &pb.GrammarTrigger{
|
||||
Word: t.Word,
|
||||
AtStart: t.AtStart,
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return &pb.ModelOptions{
|
||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||
SchedulerType: c.Diffusers.SchedulerType,
|
||||
GrammarTriggers: triggers,
|
||||
PipelineType: c.Diffusers.PipelineType,
|
||||
CFGScale: c.CFGScale,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
|
||||
@@ -9,10 +9,10 @@ import (
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
rerankModel, err := loader.Load(opts...)
|
||||
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||
rerankModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
)
|
||||
|
||||
func SoundGeneration(
|
||||
modelFile string,
|
||||
text string,
|
||||
duration *float32,
|
||||
temperature *float32,
|
||||
@@ -24,9 +25,8 @@ func SoundGeneration(
|
||||
backendConfig config.BackendConfig,
|
||||
) (string, *proto.Result, error) {
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||
soundGenModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
@@ -44,7 +44,7 @@ func SoundGeneration(
|
||||
|
||||
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
|
||||
Text: text,
|
||||
Model: backendConfig.Model,
|
||||
Model: modelFile,
|
||||
Dst: filePath,
|
||||
Sample: doSample,
|
||||
Duration: duration,
|
||||
|
||||
@@ -4,17 +4,24 @@ import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
|
||||
|
||||
modelFile := backendConfig.Model
|
||||
|
||||
var inferenceModel grpc.Backend
|
||||
var err error
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||
|
||||
if backendConfig.Backend == "" {
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
} else {
|
||||
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
}
|
||||
if err != nil {
|
||||
return schema.TokenizeResponse{}, err
|
||||
}
|
||||
@@ -28,10 +35,6 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
||||
return schema.TokenizeResponse{}, err
|
||||
}
|
||||
|
||||
if resp.Tokens == nil {
|
||||
resp.Tokens = make([]int32, 0)
|
||||
}
|
||||
|
||||
return schema.TokenizeResponse{
|
||||
Tokens: resp.Tokens,
|
||||
}, nil
|
||||
|
||||
@@ -47,7 +47,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
||||
tks = append(tks, int(t))
|
||||
}
|
||||
tr.Segments = append(tr.Segments,
|
||||
schema.TranscriptionSegment{
|
||||
schema.Segment{
|
||||
Text: s.Text,
|
||||
Id: int(s.Id),
|
||||
Start: time.Duration(s.Start),
|
||||
|
||||
@@ -14,22 +14,28 @@ import (
|
||||
)
|
||||
|
||||
func ModelTTS(
|
||||
backend,
|
||||
text,
|
||||
modelFile,
|
||||
voice,
|
||||
language string,
|
||||
loader *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
backendConfig config.BackendConfig,
|
||||
) (string, *proto.Result, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
|
||||
ttsModel, err := loader.Load(opts...)
|
||||
bb := backend
|
||||
if bb == "" {
|
||||
bb = model.PiperBackend
|
||||
}
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
|
||||
ttsModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
if ttsModel == nil {
|
||||
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
|
||||
return "", nil, fmt.Errorf("could not load piper model")
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
||||
@@ -39,21 +45,22 @@ func ModelTTS(
|
||||
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
|
||||
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
||||
|
||||
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
|
||||
// This should be addressed in a follow up PR soon.
|
||||
// Copying it over nearly verbatim, as TTS backends are not functional without this.
|
||||
// If the model file is not empty, we pass it joined with the model path
|
||||
modelPath := ""
|
||||
// Checking first that it exists and is not outside ModelPath
|
||||
// TODO: we should actually first check if the modelFile is looking like
|
||||
// a FS path
|
||||
mp := filepath.Join(loader.ModelPath, backendConfig.Model)
|
||||
if _, err := os.Stat(mp); err == nil {
|
||||
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
|
||||
return "", nil, err
|
||||
if modelFile != "" {
|
||||
// If the model file is not empty, we pass it joined with the model path
|
||||
// Checking first that it exists and is not outside ModelPath
|
||||
// TODO: we should actually first check if the modelFile is looking like
|
||||
// a FS path
|
||||
mp := filepath.Join(loader.ModelPath, modelFile)
|
||||
if _, err := os.Stat(mp); err == nil {
|
||||
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
modelPath = mp
|
||||
} else {
|
||||
modelPath = modelFile
|
||||
}
|
||||
modelPath = mp
|
||||
} else {
|
||||
modelPath = backendConfig.Model // skip this step if it fails?????
|
||||
}
|
||||
|
||||
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func VAD(request *schema.VADRequest,
|
||||
ctx context.Context,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
backendConfig config.BackendConfig) (*schema.VADResponse, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
vadModel, err := ml.Load(opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req := proto.VADRequest{
|
||||
Audio: request.Audio,
|
||||
}
|
||||
resp, err := vadModel.VAD(ctx, &req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
segments := []schema.VADSegment{}
|
||||
for _, s := range resp.Segments {
|
||||
segments = append(segments, schema.VADSegment{Start: s.Start, End: s.End})
|
||||
}
|
||||
|
||||
return &schema.VADResponse{
|
||||
Segments: segments,
|
||||
}, nil
|
||||
}
|
||||
@@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||
}
|
||||
|
||||
err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ type RunCMD struct {
|
||||
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
||||
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
|
||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
||||
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
||||
@@ -89,6 +90,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
||||
config.WithF16(r.F16),
|
||||
config.WithStringGalleries(r.Galleries),
|
||||
config.WithModelLibraryURL(r.RemoteLibrary),
|
||||
config.WithCors(r.CORS),
|
||||
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
||||
config.WithCsrf(r.CSRF),
|
||||
|
||||
@@ -86,14 +86,13 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
||||
options := config.BackendConfig{}
|
||||
options.SetDefaults()
|
||||
options.Backend = t.Backend
|
||||
options.Model = t.Model
|
||||
|
||||
var inputFile *string
|
||||
if t.InputFile != "" {
|
||||
inputFile = &t.InputFile
|
||||
}
|
||||
|
||||
filePath, _, err := backend.SoundGeneration(text,
|
||||
filePath, _, err := backend.SoundGeneration(t.Model, text,
|
||||
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
|
||||
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
|
||||
|
||||
|
||||
@@ -52,10 +52,8 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
|
||||
|
||||
options := config.BackendConfig{}
|
||||
options.SetDefaults()
|
||||
options.Backend = t.Backend
|
||||
options.Model = t.Model
|
||||
|
||||
filePath, _, err := backend.ModelTTS(text, t.Voice, t.Language, ml, opts, options)
|
||||
filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -44,6 +44,8 @@ type ApplicationConfig struct {
|
||||
DisableGalleryEndpoint bool
|
||||
LoadToMemory []string
|
||||
|
||||
ModelLibraryURL string
|
||||
|
||||
Galleries []Gallery
|
||||
|
||||
BackendAssets embed.FS
|
||||
@@ -124,6 +126,12 @@ func WithP2PToken(s string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithModelLibraryURL(url string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ModelLibraryURL = url
|
||||
}
|
||||
}
|
||||
|
||||
func WithLibPath(path string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.LibPath = path
|
||||
|
||||
@@ -287,8 +287,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
defaultTopP := 0.95
|
||||
defaultTopK := 40
|
||||
defaultTemp := 0.9
|
||||
// https://github.com/mudler/LocalAI/issues/2780
|
||||
defaultMirostat := 0
|
||||
defaultMirostat := 2
|
||||
defaultMirostatTAU := 5.0
|
||||
defaultMirostatETA := 0.1
|
||||
defaultTypicalP := 1.0
|
||||
@@ -437,21 +436,19 @@ func (c *BackendConfig) HasTemplate() bool {
|
||||
type BackendConfigUsecases int
|
||||
|
||||
const (
|
||||
FLAG_ANY BackendConfigUsecases = 0b00000000000
|
||||
FLAG_CHAT BackendConfigUsecases = 0b00000000001
|
||||
FLAG_COMPLETION BackendConfigUsecases = 0b00000000010
|
||||
FLAG_EDIT BackendConfigUsecases = 0b00000000100
|
||||
FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000
|
||||
FLAG_RERANK BackendConfigUsecases = 0b00000010000
|
||||
FLAG_IMAGE BackendConfigUsecases = 0b00000100000
|
||||
FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000
|
||||
FLAG_TTS BackendConfigUsecases = 0b00010000000
|
||||
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
|
||||
FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000
|
||||
FLAG_VAD BackendConfigUsecases = 0b10000000000
|
||||
FLAG_ANY BackendConfigUsecases = 0b000000000
|
||||
FLAG_CHAT BackendConfigUsecases = 0b000000001
|
||||
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
|
||||
FLAG_EDIT BackendConfigUsecases = 0b000000100
|
||||
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
|
||||
FLAG_RERANK BackendConfigUsecases = 0b000010000
|
||||
FLAG_IMAGE BackendConfigUsecases = 0b000100000
|
||||
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
|
||||
FLAG_TTS BackendConfigUsecases = 0b010000000
|
||||
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
|
||||
|
||||
// Common Subsets
|
||||
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
|
||||
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
|
||||
)
|
||||
|
||||
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||
@@ -466,8 +463,6 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
|
||||
"FLAG_TTS": FLAG_TTS,
|
||||
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
|
||||
"FLAG_TOKENIZE": FLAG_TOKENIZE,
|
||||
"FLAG_VAD": FLAG_VAD,
|
||||
"FLAG_LLM": FLAG_LLM,
|
||||
}
|
||||
}
|
||||
@@ -553,18 +548,5 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||
}
|
||||
}
|
||||
|
||||
if (u & FLAG_TOKENIZE) == FLAG_TOKENIZE {
|
||||
tokenizeCapableBackends := []string{"llama.cpp", "rwkv"}
|
||||
if !slices.Contains(tokenizeCapableBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if (u & FLAG_VAD) == FLAG_VAD {
|
||||
if c.Backend != "silero-vad" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -81,10 +81,10 @@ func readMultipleBackendConfigsFromFile(file string, opts ...ConfigLoaderOption)
|
||||
c := &[]*BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot read config file %q: %w", file, err)
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot unmarshal config file %q: %w", file, err)
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range *c {
|
||||
@@ -101,10 +101,10 @@ func readBackendConfigFromFile(file string, opts ...ConfigLoaderOption) (*Backen
|
||||
c := &BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("readBackendConfigFromFile cannot read config file %q: %w", file, err)
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("readBackendConfigFromFile cannot unmarshal config file %q: %w", file, err)
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
c.SetDefaults(opts...)
|
||||
@@ -117,9 +117,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
|
||||
// Load a config file if present after the model name
|
||||
cfg := &BackendConfig{
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
BasicModelRequest: schema.BasicModelRequest{
|
||||
Model: modelName,
|
||||
},
|
||||
Model: modelName,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -147,15 +145,6 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func (bcl *BackendConfigLoader) LoadBackendConfigFileByNameDefaultOptions(modelName string, appConfig *ApplicationConfig) (*BackendConfig, error) {
|
||||
return bcl.LoadBackendConfigFileByName(modelName, appConfig.ModelPath,
|
||||
LoadOptionDebug(appConfig.Debug),
|
||||
LoadOptionThreads(appConfig.Threads),
|
||||
LoadOptionContextSize(appConfig.ContextSize),
|
||||
LoadOptionF16(appConfig.F16),
|
||||
ModelPath(appConfig.ModelPath))
|
||||
}
|
||||
|
||||
// This format is currently only used when reading a single file at startup, passed in via ApplicationConfig.ConfigFile
|
||||
func (bcl *BackendConfigLoader) LoadMultipleBackendConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
@@ -178,7 +167,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoa
|
||||
defer bcl.Unlock()
|
||||
c, err := readBackendConfigFromFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("LoadBackendConfig cannot read config file %q: %w", file, err)
|
||||
return fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
|
||||
if c.Validate() {
|
||||
@@ -335,10 +324,9 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("LoadBackendConfigsFromPath cannot read directory '%s': %w", path, err)
|
||||
return fmt.Errorf("cannot read directory '%s': %w", path, err)
|
||||
}
|
||||
files := make([]fs.FileInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
@@ -356,13 +344,13 @@ func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...
|
||||
}
|
||||
c, err := readBackendConfigFromFile(filepath.Join(path, file.Name()), opts...)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("File Name", file.Name()).Msgf("LoadBackendConfigsFromPath cannot read config file")
|
||||
log.Error().Err(err).Msgf("cannot read config file: %s", file.Name())
|
||||
continue
|
||||
}
|
||||
if c.Validate() {
|
||||
bcl.configs[c.Name] = *c
|
||||
} else {
|
||||
log.Error().Err(err).Str("Name", c.Name).Msgf("config is not valid")
|
||||
log.Error().Err(err).Msgf("config is not valid")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,9 +48,9 @@ parameters:
|
||||
Expect(config.Name).To(Equal("bar-baz"))
|
||||
Expect(config.Validate()).To(BeTrue())
|
||||
|
||||
// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
|
||||
// download https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml
|
||||
httpClient := http.Client{}
|
||||
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml")
|
||||
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml")
|
||||
Expect(err).To(BeNil())
|
||||
defer resp.Body.Close()
|
||||
tmp, err = os.CreateTemp("", "config.yaml")
|
||||
|
||||
@@ -161,11 +161,10 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
|
||||
}
|
||||
|
||||
// We try to guess only if we don't have a template defined already
|
||||
guessPath := filepath.Join(modelPath, cfg.ModelFileName())
|
||||
f, err := gguf.ParseGGUFFile(guessPath)
|
||||
f, err := gguf.ParseGGUFFile(filepath.Join(modelPath, cfg.ModelFileName()))
|
||||
if err != nil {
|
||||
// Only valid for gguf files
|
||||
log.Debug().Str("filePath", guessPath).Msg("guessDefaultsFromFile: not a GGUF file")
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -48,10 +48,8 @@ var _ = Describe("Model test", func() {
|
||||
defer os.RemoveAll(tempdir)
|
||||
|
||||
gallery := []GalleryModel{{
|
||||
Metadata: Metadata{
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
},
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
}}
|
||||
out, err := yaml.Marshal(gallery)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
@@ -11,14 +11,6 @@ import (
|
||||
// It is used to install the model by resolving the URL and downloading the files.
|
||||
// The other fields are used to override the configuration of the model.
|
||||
type GalleryModel struct {
|
||||
Metadata `json:",inline" yaml:",inline"`
|
||||
// config_file is read in the situation where URL is blank - and therefore this is a base config.
|
||||
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
|
||||
// Overrides are used to override the configuration of the model located at URL
|
||||
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
|
||||
}
|
||||
|
||||
type Metadata struct {
|
||||
URL string `json:"url,omitempty" yaml:"url,omitempty"`
|
||||
Name string `json:"name,omitempty" yaml:"name,omitempty"`
|
||||
Description string `json:"description,omitempty" yaml:"description,omitempty"`
|
||||
@@ -26,6 +18,10 @@ type Metadata struct {
|
||||
URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"`
|
||||
Icon string `json:"icon,omitempty" yaml:"icon,omitempty"`
|
||||
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
|
||||
// config_file is read in the situation where URL is blank - and therefore this is a base config.
|
||||
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
|
||||
// Overrides are used to override the configuration of the model located at URL
|
||||
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
|
||||
// AdditionalFiles are used to add additional files to the model
|
||||
AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
|
||||
// Gallery is a reference to the gallery which contains the model
|
||||
|
||||
@@ -9,11 +9,7 @@ import (
|
||||
var _ = Describe("Gallery API tests", func() {
|
||||
Context("requests", func() {
|
||||
It("parses github with a branch", func() {
|
||||
req := GalleryModel{
|
||||
Metadata: Metadata{
|
||||
URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
|
||||
},
|
||||
}
|
||||
req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
|
||||
e, err := GetGalleryConfigFromURL(req.URL, "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(e.Name).To(Equal("gpt4all-j"))
|
||||
|
||||
@@ -130,6 +130,7 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
return metricsService.Shutdown()
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
// Health Checks should always be exempt from auth, so register these first
|
||||
routes.HealthRoutes(router)
|
||||
@@ -166,15 +167,13 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
galleryService := services.NewGalleryService(application.ApplicationConfig())
|
||||
galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
|
||||
|
||||
requestExtractor := middleware.NewRequestExtractor(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
|
||||
routes.RegisterElevenLabsRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
routes.RegisterLocalAIRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
routes.RegisterOpenAIRoutes(router, requestExtractor, application)
|
||||
routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
routes.RegisterOpenAIRoutes(router, application)
|
||||
if !application.ApplicationConfig().DisableWebUI {
|
||||
routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
}
|
||||
routes.RegisterJINARoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
|
||||
httpFS := http.FS(embedDirStatic)
|
||||
|
||||
|
||||
@@ -299,18 +299,14 @@ var _ = Describe("API test", func() {
|
||||
|
||||
g := []gallery.GalleryModel{
|
||||
{
|
||||
Metadata: gallery.Metadata{
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
},
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
},
|
||||
{
|
||||
Metadata: gallery.Metadata{
|
||||
Name: "bert2",
|
||||
URL: bertEmbeddingsURL,
|
||||
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
|
||||
},
|
||||
Overrides: map[string]interface{}{"foo": "bar"},
|
||||
Name: "bert2",
|
||||
URL: bertEmbeddingsURL,
|
||||
Overrides: map[string]interface{}{"foo": "bar"},
|
||||
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
|
||||
},
|
||||
}
|
||||
out, err := yaml.Marshal(g)
|
||||
@@ -480,7 +476,7 @@ var _ = Describe("API test", func() {
|
||||
})
|
||||
It("apply models from config", func() {
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
@@ -526,6 +522,77 @@ var _ = Describe("API test", func() {
|
||||
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||
})
|
||||
|
||||
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
||||
Name: "openllama_3b",
|
||||
Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
|
||||
uuid := response["uuid"].(string)
|
||||
|
||||
Eventually(func() bool {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
return response["processed"].(bool)
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
|
||||
By("testing completion")
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
|
||||
|
||||
By("testing functions")
|
||||
resp2, err := client.CreateChatCompletion(
|
||||
context.TODO(),
|
||||
openai.ChatCompletionRequest{
|
||||
Model: "openllama_3b",
|
||||
Messages: []openai.ChatCompletionMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "What is the weather like in San Francisco (celsius)?",
|
||||
},
|
||||
},
|
||||
Functions: []openai.FunctionDefinition{
|
||||
openai.FunctionDefinition{
|
||||
Name: "get_current_weather",
|
||||
Description: "Get the current weather",
|
||||
Parameters: jsonschema.Definition{
|
||||
Type: jsonschema.Object,
|
||||
Properties: map[string]jsonschema.Definition{
|
||||
"location": {
|
||||
Type: jsonschema.String,
|
||||
Description: "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {
|
||||
Type: jsonschema.String,
|
||||
Enum: []string{"celcius", "fahrenheit"},
|
||||
},
|
||||
},
|
||||
Required: []string{"location"},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp2.Choices)).To(Equal(1))
|
||||
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
|
||||
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
|
||||
|
||||
var res map[string]string
|
||||
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
|
||||
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||
|
||||
})
|
||||
|
||||
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
@@ -533,7 +600,7 @@ var _ = Describe("API test", func() {
|
||||
|
||||
modelName := "hermes-2-pro-mistral"
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
|
||||
47
core/http/ctx/fiber.go
Normal file
47
core/http/ctx/fiber.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package fiberContext
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// ModelFromContext returns the model from the context
|
||||
// If no model is specified, it will take the first available
|
||||
// Takes a model string as input which should be the one received from the user request.
|
||||
// It returns the model name resolved from the context and an error if any.
|
||||
func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
|
||||
if ctx.Params("model") != "" {
|
||||
modelInput = ctx.Params("model")
|
||||
}
|
||||
if ctx.Query("model") != "" {
|
||||
modelInput = ctx.Query("model")
|
||||
}
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
|
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelInput == "" && !bearerExists && firstModel {
|
||||
models, _ := services.ListModels(cl, loader, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||
if len(models) > 0 {
|
||||
modelInput = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelInput)
|
||||
} else {
|
||||
log.Debug().Msgf("No model specified, returning error")
|
||||
return "", fmt.Errorf("no model specified")
|
||||
}
|
||||
}
|
||||
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists {
|
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||
modelInput = bearer
|
||||
}
|
||||
return modelInput, nil
|
||||
}
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -17,21 +17,45 @@ import (
|
||||
// @Router /v1/sound-generation [post]
|
||||
func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.ElevenLabsSoundGenerationRequest)
|
||||
if !ok || input.ModelID == "" {
|
||||
return fiber.ErrBadRequest
|
||||
input := new(schema.ElevenLabsSoundGenerationRequest)
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
|
||||
if err != nil {
|
||||
modelFile = input.ModelID
|
||||
log.Warn().Str("ModelID", input.ModelID).Msg("Model not found in context")
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
if err != nil {
|
||||
modelFile = input.ModelID
|
||||
log.Warn().Str("Request ModelID", input.ModelID).Err(err).Msg("error during LoadBackendConfigFileByName, using request ModelID")
|
||||
} else {
|
||||
if input.ModelID != "" {
|
||||
modelFile = input.ModelID
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
}
|
||||
log.Debug().Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Sound Generation Request about to be sent to backend")
|
||||
|
||||
if input.Duration != nil {
|
||||
log.Debug().Float32("duration", *input.Duration).Msg("duration set")
|
||||
}
|
||||
if input.Temperature != nil {
|
||||
log.Debug().Float32("temperature", *input.Temperature).Msg("temperature set")
|
||||
}
|
||||
|
||||
// TODO: Support uploading files?
|
||||
filePath, _, err := backend.SoundGeneration(input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
||||
filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ package elevenlabs
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
@@ -20,21 +20,39 @@ import (
|
||||
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input := new(schema.ElevenLabsTTSRequest)
|
||||
voiceID := c.Params("voice-id")
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.ElevenLabsTTSRequest)
|
||||
if !ok || input.ModelID == "" {
|
||||
return fiber.ErrBadRequest
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
|
||||
if err != nil {
|
||||
modelFile = input.ModelID
|
||||
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
|
||||
}
|
||||
|
||||
log.Debug().Str("modelName", input.ModelID).Msg("elevenlabs TTS request recieved")
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
if err != nil {
|
||||
modelFile = input.ModelID
|
||||
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
|
||||
} else {
|
||||
if input.ModelID != "" {
|
||||
modelFile = input.ModelID
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
filePath, _, err := backend.ModelTTS(input.Text, voiceID, input.LanguageCode, ml, appConfig, *cfg)
|
||||
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, "", voiceID, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -3,9 +3,9 @@ package jina
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -19,32 +19,58 @@ import (
|
||||
// @Router /v1/rerank [post]
|
||||
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.JINARerankRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
req := new(schema.JINARerankRequest)
|
||||
if err := c.BodyParser(req); err != nil {
|
||||
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
|
||||
"error": "Cannot parse JSON",
|
||||
})
|
||||
}
|
||||
|
||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
input := new(schema.TTSRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Str("model", input.Model).Msg("JINA Rerank Request recieved")
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
if input.Backend != "" {
|
||||
cfg.Backend = input.Backend
|
||||
}
|
||||
|
||||
request := &proto.RerankRequest{
|
||||
Query: input.Query,
|
||||
TopN: int32(input.TopN),
|
||||
Documents: input.Documents,
|
||||
Query: req.Query,
|
||||
TopN: int32(req.TopN),
|
||||
Documents: req.Documents,
|
||||
}
|
||||
|
||||
results, err := backend.Rerank(request, ml, appConfig, *cfg)
|
||||
results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
response := &schema.JINARerankResponse{
|
||||
Model: input.Model,
|
||||
Model: req.Model,
|
||||
}
|
||||
|
||||
for _, r := range results.Results {
|
||||
|
||||
@@ -117,25 +117,19 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
|
||||
// @Router /models/available [get]
|
||||
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
|
||||
|
||||
models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries))
|
||||
|
||||
m := []gallery.Metadata{}
|
||||
|
||||
for _, mm := range models {
|
||||
m = append(m, mm.Metadata)
|
||||
log.Debug().Msgf("Models found from galleries: %+v", models)
|
||||
for _, m := range models {
|
||||
log.Debug().Msgf("Model found from galleries: %+v", m)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Models %#v", m)
|
||||
|
||||
dat, err := json.Marshal(m)
|
||||
dat, err := json.Marshal(models)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not marshal models: %w", err)
|
||||
return err
|
||||
}
|
||||
return c.Send(dat)
|
||||
}
|
||||
|
||||
@@ -4,15 +4,13 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
// TODO: This is not yet in use. Needs middleware rework, since it is not referenced.
|
||||
|
||||
// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
|
||||
//
|
||||
// @Summary Get TokenMetrics for Active Slot.
|
||||
@@ -31,13 +29,18 @@ func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader,
|
||||
return err
|
||||
}
|
||||
|
||||
modelFile, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
||||
if !ok || modelFile != "" {
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByNameDefaultOptions(modelFile, appConfig)
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
|
||||
@@ -4,32 +4,55 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// TokenizeEndpoint exposes a REST API to tokenize the content
|
||||
// @Summary Tokenize the input.
|
||||
// @Param request body schema.TokenizeRequest true "Request"
|
||||
// @Success 200 {object} schema.TokenizeResponse "Response"
|
||||
// @Router /v1/tokenize [post]
|
||||
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(ctx *fiber.Ctx) error {
|
||||
input, ok := ctx.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TokenizeRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input := new(schema.TokenizeRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cfg, ok := ctx.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return ctx.JSON(tokenResponse)
|
||||
|
||||
c.JSON(tokenResponse)
|
||||
return nil
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ package localai
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
@@ -24,24 +24,37 @@ import (
|
||||
// @Router /tts [post]
|
||||
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TTSRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
input := new(schema.TTSRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
}
|
||||
|
||||
log.Debug().Str("model", input.Model).Msg("LocalAI TTS Request recieved")
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
|
||||
if cfg.Backend == "" {
|
||||
if input.Backend != "" {
|
||||
cfg.Backend = input.Backend
|
||||
} else {
|
||||
cfg.Backend = model.PiperBackend
|
||||
}
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
if input.Backend != "" {
|
||||
cfg.Backend = input.Backend
|
||||
}
|
||||
|
||||
if input.Language != "" {
|
||||
@@ -52,7 +65,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
||||
cfg.Voice = input.Voice
|
||||
}
|
||||
|
||||
filePath, _, err := backend.ModelTTS(input.Input, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
|
||||
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -4,8 +4,9 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -18,20 +19,45 @@ import (
|
||||
// @Router /vad [post]
|
||||
func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.VADRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
input := new(schema.VADRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
}
|
||||
|
||||
log.Debug().Str("model", input.Model).Msg("LocalAI VAD Request recieved")
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
|
||||
resp, err := backend.VAD(input, c.Context(), ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
opts := backend.ModelOptions(*cfg, appConfig, model.WithBackendString(cfg.Backend), model.WithModel(modelFile))
|
||||
|
||||
vadModel, err := ml.Load(opts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req := proto.VADRequest{
|
||||
Audio: input.Audio,
|
||||
}
|
||||
resp, err := vadModel.VAD(c.Context(), &req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -5,19 +5,18 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
@@ -175,20 +174,26 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
textContentToReturn = ""
|
||||
id = uuid.New().String()
|
||||
created = int(time.Now().Unix())
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
// Set CorrelationID
|
||||
correlationID := c.Get("X-Correlation-ID")
|
||||
if len(strings.TrimSpace(correlationID)) == 0 {
|
||||
correlationID = id
|
||||
}
|
||||
c.Set("X-Correlation-ID", correlationID)
|
||||
|
||||
// Opt-in extra usage flag
|
||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
return fiber.ErrBadRequest
|
||||
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Chat endpoint configuration read: %+v", config)
|
||||
config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
log.Debug().Msgf("Configuration read: %+v", config)
|
||||
|
||||
funcs := input.Functions
|
||||
shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
|
||||
@@ -396,11 +401,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
|
||||
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
|
||||
|
||||
finishReason := "stop"
|
||||
if len(input.Tools) > 0 {
|
||||
finishReason = "tool_calls"
|
||||
}
|
||||
|
||||
switch {
|
||||
case noActionsToRun:
|
||||
result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput)
|
||||
@@ -408,18 +408,19 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
log.Error().Err(err).Msg("error handling question")
|
||||
return
|
||||
}
|
||||
|
||||
*c = append(*c, schema.Choice{
|
||||
FinishReason: finishReason,
|
||||
Message: &schema.Message{Role: "assistant", Content: &result}})
|
||||
Message: &schema.Message{Role: "assistant", Content: &result}})
|
||||
default:
|
||||
toolChoice := schema.Choice{
|
||||
FinishReason: finishReason,
|
||||
Message: &schema.Message{
|
||||
Role: "assistant",
|
||||
},
|
||||
}
|
||||
|
||||
if len(input.Tools) > 0 {
|
||||
toolChoice.FinishReason = "tool_calls"
|
||||
}
|
||||
|
||||
for _, ss := range results {
|
||||
name, args := ss.Name, ss.Arguments
|
||||
if len(input.Tools) > 0 {
|
||||
@@ -437,7 +438,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
},
|
||||
)
|
||||
} else {
|
||||
// otherwise we return more choices directly (deprecated)
|
||||
// otherwise we return more choices directly
|
||||
*c = append(*c, schema.Choice{
|
||||
FinishReason: "function_call",
|
||||
Message: &schema.Message{
|
||||
@@ -538,7 +539,7 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
|
||||
audios = append(audios, m.StringAudios...)
|
||||
}
|
||||
|
||||
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, o, nil)
|
||||
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("model inference failed")
|
||||
return "", err
|
||||
|
||||
@@ -10,13 +10,12 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
@@ -28,9 +27,10 @@ import (
|
||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||
// @Router /v1/completions [post]
|
||||
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
|
||||
process := func(id string, s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
||||
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
||||
ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||
usage := schema.OpenAIUsage{
|
||||
PromptTokens: tokenUsage.Prompt,
|
||||
@@ -63,18 +63,22 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
// Handle Correlation
|
||||
id := c.Get("X-Correlation-ID", uuid.New().String())
|
||||
// Add Correlation
|
||||
c.Set("X-Correlation-ID", id)
|
||||
|
||||
// Opt-in extra usage flag
|
||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
return fiber.ErrBadRequest
|
||||
log.Debug().Msgf("`input`: %+v", input)
|
||||
|
||||
config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
if config.ResponseFormatMap != nil {
|
||||
@@ -118,7 +122,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
|
||||
responses := make(chan schema.OpenAIResponse)
|
||||
|
||||
go process(id, predInput, input, config, ml, responses, extraUsage)
|
||||
go process(predInput, input, config, ml, responses, extraUsage)
|
||||
|
||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||
|
||||
|
||||
@@ -2,17 +2,16 @@ package openai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -26,21 +25,20 @@ import (
|
||||
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
// Opt-in extra usage flag
|
||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
return fiber.ErrBadRequest
|
||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Edit Endpoint Input : %+v", input)
|
||||
log.Debug().Msgf("Edit Endpoint Config: %+v", *config)
|
||||
config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
var result []schema.Choice
|
||||
totalTokenUsage := backend.TokenUsage{}
|
||||
|
||||
@@ -2,11 +2,11 @@ package openai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -23,14 +23,14 @@ import (
|
||||
// @Router /v1/embeddings [post]
|
||||
func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
model, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
return fiber.ErrBadRequest
|
||||
config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
@@ -15,7 +15,6 @@ import (
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
@@ -67,23 +66,25 @@ func downloadFile(url string) (string, error) {
|
||||
// @Router /v1/images/generations [post]
|
||||
func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
log.Error().Msg("Image Endpoint - Invalid Input")
|
||||
return fiber.ErrBadRequest
|
||||
m, input, err := readRequest(c, cl, ml, appConfig, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
log.Error().Msg("Image Endpoint - Invalid Config")
|
||||
return fiber.ErrBadRequest
|
||||
|
||||
if m == "" {
|
||||
m = "stablediffusion"
|
||||
}
|
||||
log.Debug().Msgf("Loading model: %+v", m)
|
||||
|
||||
config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
src := ""
|
||||
if input.File != "" {
|
||||
|
||||
fileData := []byte{}
|
||||
var err error
|
||||
// check if input.File is an URL, if so download it and save it
|
||||
// to a temporary file
|
||||
if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
|
||||
|
||||
@@ -37,7 +37,7 @@ func ComputeChoices(
|
||||
}
|
||||
|
||||
// get the model function to call for the result
|
||||
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, config, o, tokenCallback)
|
||||
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, *config, o, tokenCallback)
|
||||
if err != nil {
|
||||
return result, backend.TokenUsage{}, err
|
||||
}
|
||||
|
||||
@@ -1,450 +1,326 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type correlationIDKeyType string
|
||||
|
||||
// CorrelationIDKey to track request across process boundary
|
||||
const CorrelationIDKey correlationIDKeyType = "correlationID"
|
||||
|
||||
type RequestExtractor struct {
|
||||
backendConfigLoader *config.BackendConfigLoader
|
||||
modelLoader *model.ModelLoader
|
||||
applicationConfig *config.ApplicationConfig
|
||||
}
|
||||
|
||||
func NewRequestExtractor(backendConfigLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, applicationConfig *config.ApplicationConfig) *RequestExtractor {
|
||||
return &RequestExtractor{
|
||||
backendConfigLoader: backendConfigLoader,
|
||||
modelLoader: modelLoader,
|
||||
applicationConfig: applicationConfig,
|
||||
}
|
||||
}
|
||||
|
||||
const CONTEXT_LOCALS_KEY_MODEL_NAME = "MODEL_NAME"
|
||||
const CONTEXT_LOCALS_KEY_LOCALAI_REQUEST = "LOCALAI_REQUEST"
|
||||
const CONTEXT_LOCALS_KEY_MODEL_CONFIG = "MODEL_CONFIG"
|
||||
|
||||
// TODO: Refactor to not return error if unchanged
|
||||
func (re *RequestExtractor) setModelNameFromRequest(ctx *fiber.Ctx) {
|
||||
model, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
||||
if ok && model != "" {
|
||||
return
|
||||
}
|
||||
model = ctx.Params("model")
|
||||
|
||||
if (model == "") && ctx.Query("model") != "" {
|
||||
model = ctx.Query("model")
|
||||
}
|
||||
|
||||
if model == "" {
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // "Bearer " => "Bear" to please go-staticcheck. It looks dumb but we might as well take free performance on something called for nearly every request.
|
||||
if bearer != "" {
|
||||
exists, err := services.CheckIfModelExists(re.backendConfigLoader, re.modelLoader, bearer, services.ALWAYS_INCLUDE)
|
||||
if err == nil && exists {
|
||||
model = bearer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, model)
|
||||
}
|
||||
|
||||
func (re *RequestExtractor) BuildConstantDefaultModelNameMiddleware(defaultModelName string) fiber.Handler {
|
||||
return func(ctx *fiber.Ctx) error {
|
||||
re.setModelNameFromRequest(ctx)
|
||||
localModelName, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
||||
if !ok || localModelName == "" {
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, defaultModelName)
|
||||
log.Debug().Str("defaultModelName", defaultModelName).Msg("context local model name not found, setting to default")
|
||||
}
|
||||
return ctx.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func (re *RequestExtractor) BuildFilteredFirstAvailableDefaultModel(filterFn config.BackendConfigFilterFn) fiber.Handler {
|
||||
return func(ctx *fiber.Ctx) error {
|
||||
re.setModelNameFromRequest(ctx)
|
||||
localModelName := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
||||
if localModelName != "" { // Don't overwrite existing values
|
||||
return ctx.Next()
|
||||
}
|
||||
|
||||
modelNames, err := services.ListModels(re.backendConfigLoader, re.modelLoader, filterFn, services.SKIP_IF_CONFIGURED)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("non-fatal error calling ListModels during SetDefaultModelNameToFirstAvailable()")
|
||||
return ctx.Next()
|
||||
}
|
||||
|
||||
if len(modelNames) == 0 {
|
||||
log.Warn().Msg("SetDefaultModelNameToFirstAvailable used with no matching models installed")
|
||||
// This is non-fatal - making it so was breaking the case of direct installation of raw models
|
||||
// return errors.New("this endpoint requires at least one model to be installed")
|
||||
return ctx.Next()
|
||||
}
|
||||
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, modelNames[0])
|
||||
log.Debug().Str("first model name", modelNames[0]).Msg("context local model name not found, setting to the first model")
|
||||
return ctx.Next()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: If context and cancel above belong on all methods, move that part of above into here!
|
||||
// Otherwise, it's in its own method below for now
|
||||
func (re *RequestExtractor) SetModelAndConfig(initializer func() schema.LocalAIRequest) fiber.Handler {
|
||||
return func(ctx *fiber.Ctx) error {
|
||||
input := initializer()
|
||||
if input == nil {
|
||||
return fmt.Errorf("unable to initialize body")
|
||||
}
|
||||
if err := ctx.BodyParser(input); err != nil {
|
||||
return fmt.Errorf("failed parsing request body: %w", err)
|
||||
}
|
||||
|
||||
// If this request doesn't have an associated model name, fetch it from earlier in the middleware chain
|
||||
if input.ModelName(nil) == "" {
|
||||
localModelName, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
||||
if ok && localModelName != "" {
|
||||
log.Debug().Str("context localModelName", localModelName).Msg("overriding empty model name in request body with value found earlier in middleware chain")
|
||||
input.ModelName(&localModelName)
|
||||
}
|
||||
}
|
||||
|
||||
cfg, err := re.backendConfigLoader.LoadBackendConfigFileByNameDefaultOptions(input.ModelName(nil), re.applicationConfig)
|
||||
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
log.Warn().Msgf("Model Configuration File not found for %q", input.ModelName(nil))
|
||||
} else if cfg.Model == "" && input.ModelName(nil) != "" {
|
||||
log.Debug().Str("input.ModelName", input.ModelName(nil)).Msg("config does not include model, using input")
|
||||
cfg.Model = input.ModelName(nil)
|
||||
}
|
||||
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST, input)
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG, cfg)
|
||||
|
||||
return ctx.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func (re *RequestExtractor) SetOpenAIRequest(ctx *fiber.Ctx) error {
|
||||
input, ok := ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
cfg, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
// Extract or generate the correlation ID
|
||||
correlationID := ctx.Get("X-Correlation-ID", uuid.New().String())
|
||||
ctx.Set("X-Correlation-ID", correlationID)
|
||||
|
||||
c1, cancel := context.WithCancel(re.applicationConfig.Context)
|
||||
// Add the correlation ID to the new context
|
||||
ctxWithCorrelationID := context.WithValue(c1, CorrelationIDKey, correlationID)
|
||||
|
||||
input.Context = ctxWithCorrelationID
|
||||
input.Cancel = cancel
|
||||
|
||||
err := mergeOpenAIRequestAndBackendConfig(cfg, input)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cfg.Model == "" {
|
||||
log.Debug().Str("input.Model", input.Model).Msg("replacing empty cfg.Model with input value")
|
||||
cfg.Model = input.Model
|
||||
}
|
||||
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST, input)
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG, cfg)
|
||||
|
||||
return ctx.Next()
|
||||
}
|
||||
|
||||
func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *schema.OpenAIRequest) error {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
}
|
||||
if input.TopK != nil {
|
||||
config.TopK = input.TopK
|
||||
}
|
||||
if input.TopP != nil {
|
||||
config.TopP = input.TopP
|
||||
}
|
||||
|
||||
if input.Backend != "" {
|
||||
config.Backend = input.Backend
|
||||
}
|
||||
|
||||
if input.ClipSkip != 0 {
|
||||
config.Diffusers.ClipSkip = input.ClipSkip
|
||||
}
|
||||
|
||||
if input.ModelBaseName != "" {
|
||||
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
|
||||
}
|
||||
|
||||
if input.NegativePromptScale != 0 {
|
||||
config.NegativePromptScale = input.NegativePromptScale
|
||||
}
|
||||
|
||||
if input.UseFastTokenizer {
|
||||
config.UseFastTokenizer = input.UseFastTokenizer
|
||||
}
|
||||
|
||||
if input.NegativePrompt != "" {
|
||||
config.NegativePrompt = input.NegativePrompt
|
||||
}
|
||||
|
||||
if input.RopeFreqBase != 0 {
|
||||
config.RopeFreqBase = input.RopeFreqBase
|
||||
}
|
||||
|
||||
if input.RopeFreqScale != 0 {
|
||||
config.RopeFreqScale = input.RopeFreqScale
|
||||
}
|
||||
|
||||
if input.Grammar != "" {
|
||||
config.Grammar = input.Grammar
|
||||
}
|
||||
|
||||
if input.Temperature != nil {
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
|
||||
if input.Maxtokens != nil {
|
||||
config.Maxtokens = input.Maxtokens
|
||||
}
|
||||
|
||||
if input.ResponseFormat != nil {
|
||||
switch responseFormat := input.ResponseFormat.(type) {
|
||||
case string:
|
||||
config.ResponseFormat = responseFormat
|
||||
case map[string]interface{}:
|
||||
config.ResponseFormatMap = responseFormat
|
||||
}
|
||||
}
|
||||
|
||||
switch stop := input.Stop.(type) {
|
||||
case string:
|
||||
if stop != "" {
|
||||
config.StopWords = append(config.StopWords, stop)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range stop {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.StopWords = append(config.StopWords, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(input.Tools) > 0 {
|
||||
for _, tool := range input.Tools {
|
||||
input.Functions = append(input.Functions, tool.Function)
|
||||
}
|
||||
}
|
||||
|
||||
if input.ToolsChoice != nil {
|
||||
var toolChoice functions.Tool
|
||||
|
||||
switch content := input.ToolsChoice.(type) {
|
||||
case string:
|
||||
_ = json.Unmarshal([]byte(content), &toolChoice)
|
||||
case map[string]interface{}:
|
||||
dat, _ := json.Marshal(content)
|
||||
_ = json.Unmarshal(dat, &toolChoice)
|
||||
}
|
||||
input.FunctionCall = map[string]interface{}{
|
||||
"name": toolChoice.Function.Name,
|
||||
}
|
||||
}
|
||||
|
||||
// Decode each request's message content
|
||||
imgIndex, vidIndex, audioIndex := 0, 0, 0
|
||||
for i, m := range input.Messages {
|
||||
nrOfImgsInMessage := 0
|
||||
nrOfVideosInMessage := 0
|
||||
nrOfAudiosInMessage := 0
|
||||
|
||||
switch content := m.Content.(type) {
|
||||
case string:
|
||||
input.Messages[i].StringContent = content
|
||||
case []interface{}:
|
||||
dat, _ := json.Marshal(content)
|
||||
c := []schema.Content{}
|
||||
json.Unmarshal(dat, &c)
|
||||
|
||||
textContent := ""
|
||||
// we will template this at the end
|
||||
|
||||
CONTENT:
|
||||
for _, pp := range c {
|
||||
switch pp.Type {
|
||||
case "text":
|
||||
textContent += pp.Text
|
||||
//input.Messages[i].StringContent = pp.Text
|
||||
case "video", "video_url":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding video: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
|
||||
vidIndex++
|
||||
nrOfVideosInMessage++
|
||||
case "audio_url", "audio":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding image: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
|
||||
audioIndex++
|
||||
nrOfAudiosInMessage++
|
||||
case "image_url", "image":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding image: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
|
||||
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||
|
||||
imgIndex++
|
||||
nrOfImgsInMessage++
|
||||
}
|
||||
}
|
||||
|
||||
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{
|
||||
TotalImages: imgIndex,
|
||||
TotalVideos: vidIndex,
|
||||
TotalAudios: audioIndex,
|
||||
ImagesInMessage: nrOfImgsInMessage,
|
||||
VideosInMessage: nrOfVideosInMessage,
|
||||
AudiosInMessage: nrOfAudiosInMessage,
|
||||
}, textContent)
|
||||
}
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
|
||||
if input.FrequencyPenalty != 0 {
|
||||
config.FrequencyPenalty = input.FrequencyPenalty
|
||||
}
|
||||
|
||||
if input.PresencePenalty != 0 {
|
||||
config.PresencePenalty = input.PresencePenalty
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
config.Keep = input.Keep
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
config.Batch = input.Batch
|
||||
}
|
||||
|
||||
if input.IgnoreEOS {
|
||||
config.IgnoreEOS = input.IgnoreEOS
|
||||
}
|
||||
|
||||
if input.Seed != nil {
|
||||
config.Seed = input.Seed
|
||||
}
|
||||
|
||||
if input.TypicalP != nil {
|
||||
config.TypicalP = input.TypicalP
|
||||
}
|
||||
|
||||
log.Debug().Str("input.Input", fmt.Sprintf("%+v", input.Input))
|
||||
|
||||
switch inputs := input.Input.(type) {
|
||||
case string:
|
||||
if inputs != "" {
|
||||
config.InputStrings = append(config.InputStrings, inputs)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range inputs {
|
||||
switch i := pp.(type) {
|
||||
case string:
|
||||
config.InputStrings = append(config.InputStrings, i)
|
||||
case []interface{}:
|
||||
tokens := []int{}
|
||||
for _, ii := range i {
|
||||
tokens = append(tokens, int(ii.(float64)))
|
||||
}
|
||||
config.InputToken = append(config.InputToken, tokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Can be either a string or an object
|
||||
switch fnc := input.FunctionCall.(type) {
|
||||
case string:
|
||||
if fnc != "" {
|
||||
config.SetFunctionCallString(fnc)
|
||||
}
|
||||
case map[string]interface{}:
|
||||
var name string
|
||||
n, exists := fnc["name"]
|
||||
if exists {
|
||||
nn, e := n.(string)
|
||||
if e {
|
||||
name = nn
|
||||
}
|
||||
}
|
||||
config.SetFunctionCallNameString(name)
|
||||
}
|
||||
|
||||
switch p := input.Prompt.(type) {
|
||||
case string:
|
||||
config.PromptStrings = append(config.PromptStrings, p)
|
||||
case []interface{}:
|
||||
for _, pp := range p {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.PromptStrings = append(config.PromptStrings, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a quality was defined as number, convert it to step
|
||||
if input.Quality != "" {
|
||||
q, err := strconv.Atoi(input.Quality)
|
||||
if err == nil {
|
||||
config.Step = q
|
||||
}
|
||||
}
|
||||
|
||||
if config.Validate() {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("unable to validate configuration after merging")
|
||||
}
|
||||
package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type correlationIDKeyType string
|
||||
|
||||
// CorrelationIDKey to track request across process boundary
|
||||
const CorrelationIDKey correlationIDKeyType = "correlationID"
|
||||
|
||||
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
||||
input := new(schema.OpenAIRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
||||
}
|
||||
|
||||
received, _ := json.Marshal(input)
|
||||
// Extract or generate the correlation ID
|
||||
correlationID := c.Get("X-Correlation-ID", uuid.New().String())
|
||||
|
||||
ctx, cancel := context.WithCancel(o.Context)
|
||||
// Add the correlation ID to the new context
|
||||
ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID)
|
||||
|
||||
input.Context = ctxWithCorrelationID
|
||||
input.Cancel = cancel
|
||||
|
||||
log.Debug().Msgf("Request received: %s", string(received))
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, firstModel)
|
||||
|
||||
return modelFile, input, err
|
||||
}
|
||||
|
||||
func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
}
|
||||
if input.TopK != nil {
|
||||
config.TopK = input.TopK
|
||||
}
|
||||
if input.TopP != nil {
|
||||
config.TopP = input.TopP
|
||||
}
|
||||
|
||||
if input.Backend != "" {
|
||||
config.Backend = input.Backend
|
||||
}
|
||||
|
||||
if input.ClipSkip != 0 {
|
||||
config.Diffusers.ClipSkip = input.ClipSkip
|
||||
}
|
||||
|
||||
if input.ModelBaseName != "" {
|
||||
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
|
||||
}
|
||||
|
||||
if input.NegativePromptScale != 0 {
|
||||
config.NegativePromptScale = input.NegativePromptScale
|
||||
}
|
||||
|
||||
if input.UseFastTokenizer {
|
||||
config.UseFastTokenizer = input.UseFastTokenizer
|
||||
}
|
||||
|
||||
if input.NegativePrompt != "" {
|
||||
config.NegativePrompt = input.NegativePrompt
|
||||
}
|
||||
|
||||
if input.RopeFreqBase != 0 {
|
||||
config.RopeFreqBase = input.RopeFreqBase
|
||||
}
|
||||
|
||||
if input.RopeFreqScale != 0 {
|
||||
config.RopeFreqScale = input.RopeFreqScale
|
||||
}
|
||||
|
||||
if input.Grammar != "" {
|
||||
config.Grammar = input.Grammar
|
||||
}
|
||||
|
||||
if input.Temperature != nil {
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
|
||||
if input.Maxtokens != nil {
|
||||
config.Maxtokens = input.Maxtokens
|
||||
}
|
||||
|
||||
if input.ResponseFormat != nil {
|
||||
switch responseFormat := input.ResponseFormat.(type) {
|
||||
case string:
|
||||
config.ResponseFormat = responseFormat
|
||||
case map[string]interface{}:
|
||||
config.ResponseFormatMap = responseFormat
|
||||
}
|
||||
}
|
||||
|
||||
switch stop := input.Stop.(type) {
|
||||
case string:
|
||||
if stop != "" {
|
||||
config.StopWords = append(config.StopWords, stop)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range stop {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.StopWords = append(config.StopWords, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(input.Tools) > 0 {
|
||||
for _, tool := range input.Tools {
|
||||
input.Functions = append(input.Functions, tool.Function)
|
||||
}
|
||||
}
|
||||
|
||||
if input.ToolsChoice != nil {
|
||||
var toolChoice functions.Tool
|
||||
|
||||
switch content := input.ToolsChoice.(type) {
|
||||
case string:
|
||||
_ = json.Unmarshal([]byte(content), &toolChoice)
|
||||
case map[string]interface{}:
|
||||
dat, _ := json.Marshal(content)
|
||||
_ = json.Unmarshal(dat, &toolChoice)
|
||||
}
|
||||
input.FunctionCall = map[string]interface{}{
|
||||
"name": toolChoice.Function.Name,
|
||||
}
|
||||
}
|
||||
|
||||
// Decode each request's message content
|
||||
imgIndex, vidIndex, audioIndex := 0, 0, 0
|
||||
for i, m := range input.Messages {
|
||||
nrOfImgsInMessage := 0
|
||||
nrOfVideosInMessage := 0
|
||||
nrOfAudiosInMessage := 0
|
||||
|
||||
switch content := m.Content.(type) {
|
||||
case string:
|
||||
input.Messages[i].StringContent = content
|
||||
case []interface{}:
|
||||
dat, _ := json.Marshal(content)
|
||||
c := []schema.Content{}
|
||||
json.Unmarshal(dat, &c)
|
||||
|
||||
textContent := ""
|
||||
// we will template this at the end
|
||||
|
||||
CONTENT:
|
||||
for _, pp := range c {
|
||||
switch pp.Type {
|
||||
case "text":
|
||||
textContent += pp.Text
|
||||
//input.Messages[i].StringContent = pp.Text
|
||||
case "video", "video_url":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding video: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
|
||||
vidIndex++
|
||||
nrOfVideosInMessage++
|
||||
case "audio_url", "audio":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding image: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
|
||||
audioIndex++
|
||||
nrOfAudiosInMessage++
|
||||
case "image_url", "image":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding image: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
|
||||
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||
|
||||
imgIndex++
|
||||
nrOfImgsInMessage++
|
||||
}
|
||||
}
|
||||
|
||||
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{
|
||||
TotalImages: imgIndex,
|
||||
TotalVideos: vidIndex,
|
||||
TotalAudios: audioIndex,
|
||||
ImagesInMessage: nrOfImgsInMessage,
|
||||
VideosInMessage: nrOfVideosInMessage,
|
||||
AudiosInMessage: nrOfAudiosInMessage,
|
||||
}, textContent)
|
||||
}
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
|
||||
if input.FrequencyPenalty != 0 {
|
||||
config.FrequencyPenalty = input.FrequencyPenalty
|
||||
}
|
||||
|
||||
if input.PresencePenalty != 0 {
|
||||
config.PresencePenalty = input.PresencePenalty
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
config.Keep = input.Keep
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
config.Batch = input.Batch
|
||||
}
|
||||
|
||||
if input.IgnoreEOS {
|
||||
config.IgnoreEOS = input.IgnoreEOS
|
||||
}
|
||||
|
||||
if input.Seed != nil {
|
||||
config.Seed = input.Seed
|
||||
}
|
||||
|
||||
if input.TypicalP != nil {
|
||||
config.TypicalP = input.TypicalP
|
||||
}
|
||||
|
||||
switch inputs := input.Input.(type) {
|
||||
case string:
|
||||
if inputs != "" {
|
||||
config.InputStrings = append(config.InputStrings, inputs)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range inputs {
|
||||
switch i := pp.(type) {
|
||||
case string:
|
||||
config.InputStrings = append(config.InputStrings, i)
|
||||
case []interface{}:
|
||||
tokens := []int{}
|
||||
for _, ii := range i {
|
||||
tokens = append(tokens, int(ii.(float64)))
|
||||
}
|
||||
config.InputToken = append(config.InputToken, tokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Can be either a string or an object
|
||||
switch fnc := input.FunctionCall.(type) {
|
||||
case string:
|
||||
if fnc != "" {
|
||||
config.SetFunctionCallString(fnc)
|
||||
}
|
||||
case map[string]interface{}:
|
||||
var name string
|
||||
n, exists := fnc["name"]
|
||||
if exists {
|
||||
nn, e := n.(string)
|
||||
if e {
|
||||
name = nn
|
||||
}
|
||||
}
|
||||
config.SetFunctionCallNameString(name)
|
||||
}
|
||||
|
||||
switch p := input.Prompt.(type) {
|
||||
case string:
|
||||
config.PromptStrings = append(config.PromptStrings, p)
|
||||
case []interface{}:
|
||||
for _, pp := range p {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.PromptStrings = append(config.PromptStrings, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a quality was defined as number, convert it to step
|
||||
if input.Quality != "" {
|
||||
q, err := strconv.Atoi(input.Quality)
|
||||
if err == nil {
|
||||
config.Step = q
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
|
||||
cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath,
|
||||
config.LoadOptionDebug(debug),
|
||||
config.LoadOptionThreads(threads),
|
||||
config.LoadOptionContextSize(ctx),
|
||||
config.LoadOptionF16(f16),
|
||||
)
|
||||
|
||||
// Set the parameters for the language model prediction
|
||||
updateRequestConfig(cfg, input)
|
||||
|
||||
if !cfg.Validate() {
|
||||
return nil, nil, fmt.Errorf("failed to validate config")
|
||||
}
|
||||
|
||||
return cfg, input, err
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -9,8 +10,6 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
@@ -26,16 +25,15 @@ import (
|
||||
// @Router /v1/audio/transcriptions [post]
|
||||
func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
m, input, err := readRequest(c, cl, ml, appConfig, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
return fiber.ErrBadRequest
|
||||
config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request: %w", err)
|
||||
}
|
||||
|
||||
// retrieve the file data from the request
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
|
||||
@@ -4,26 +4,17 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/elevenlabs"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func RegisterElevenLabsRoutes(app *fiber.App,
|
||||
re *middleware.RequestExtractor,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig) {
|
||||
|
||||
// Elevenlabs
|
||||
app.Post("/v1/text-to-speech/:voice-id",
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.ElevenLabsTTSRequest) }),
|
||||
elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/text-to-speech/:voice-id", elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
app.Post("/v1/sound-generation",
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_SOUND_GENERATION)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.ElevenLabsSoundGenerationRequest) }),
|
||||
elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/sound-generation", elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
||||
|
||||
}
|
||||
|
||||
@@ -3,22 +3,16 @@ package routes
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/jina"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func RegisterJINARoutes(app *fiber.App,
|
||||
re *middleware.RequestExtractor,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig) {
|
||||
|
||||
// POST endpoint to mimic the reranking
|
||||
app.Post("/v1/rerank",
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_RERANK)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.JINARerankRequest) }),
|
||||
jina.JINARerankEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
|
||||
}
|
||||
|
||||
@@ -5,16 +5,13 @@ import (
|
||||
"github.com/gofiber/swagger"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func RegisterLocalAIRoutes(router *fiber.App,
|
||||
requestExtractor *middleware.RequestExtractor,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
@@ -36,18 +33,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||
router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||
}
|
||||
|
||||
router.Post("/tts",
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),
|
||||
localai.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
vadChain := []fiber.Handler{
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VAD)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VADRequest) }),
|
||||
localai.VADEndpoint(cl, ml, appConfig),
|
||||
}
|
||||
router.Post("/vad", vadChain...)
|
||||
router.Post("/v1/vad", vadChain...)
|
||||
router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
|
||||
router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
|
||||
|
||||
// Stores
|
||||
sl := model.NewModelLoader("")
|
||||
@@ -60,14 +47,10 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||
}
|
||||
|
||||
// Backend Statistics Module
|
||||
// TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
|
||||
// Experimental Backend Statistics Module
|
||||
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
||||
router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
||||
router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
||||
// The v1/* urls are exactly the same as above - makes local e2e testing easier if they are registered.
|
||||
router.Get("/v1/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
||||
router.Post("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
||||
|
||||
// p2p
|
||||
if p2p.IsP2PEnabled() {
|
||||
@@ -84,9 +67,6 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||
router.Get("/system", localai.SystemInformations(ml, appConfig))
|
||||
|
||||
// misc
|
||||
router.Post("/v1/tokenize",
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }),
|
||||
localai.TokenizeEndpoint(cl, ml, appConfig))
|
||||
router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
|
||||
|
||||
}
|
||||
|
||||
@@ -3,50 +3,51 @@ package routes
|
||||
import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
)
|
||||
|
||||
func RegisterOpenAIRoutes(app *fiber.App,
|
||||
re *middleware.RequestExtractor,
|
||||
application *application.Application) {
|
||||
// openAI compatible API endpoint
|
||||
|
||||
// chat
|
||||
chatChain := []fiber.Handler{
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.ChatEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
|
||||
}
|
||||
app.Post("/v1/chat/completions", chatChain...)
|
||||
app.Post("/chat/completions", chatChain...)
|
||||
app.Post("/v1/chat/completions",
|
||||
openai.ChatEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
app.Post("/chat/completions",
|
||||
openai.ChatEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
// edit
|
||||
editChain := []fiber.Handler{
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EDIT)),
|
||||
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.EditEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
|
||||
}
|
||||
app.Post("/v1/edits", editChain...)
|
||||
app.Post("/edits", editChain...)
|
||||
app.Post("/v1/edits",
|
||||
openai.EditEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
// completion
|
||||
completionChain := []fiber.Handler{
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_COMPLETION)),
|
||||
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.CompletionEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
|
||||
}
|
||||
app.Post("/v1/completions", completionChain...)
|
||||
app.Post("/completions", completionChain...)
|
||||
app.Post("/v1/engines/:model/completions", completionChain...)
|
||||
app.Post("/edits",
|
||||
openai.EditEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
// assistant
|
||||
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
@@ -80,37 +81,45 @@ func RegisterOpenAIRoutes(app *fiber.App,
|
||||
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
|
||||
// embeddings
|
||||
embeddingChain := []fiber.Handler{
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EMBEDDINGS)),
|
||||
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()),
|
||||
}
|
||||
app.Post("/v1/embeddings", embeddingChain...)
|
||||
app.Post("/embeddings", embeddingChain...)
|
||||
app.Post("/v1/engines/:model/embeddings", embeddingChain...)
|
||||
|
||||
// audio
|
||||
app.Post("/v1/audio/transcriptions",
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()),
|
||||
// completion
|
||||
app.Post("/v1/completions",
|
||||
openai.CompletionEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
app.Post("/v1/audio/speech",
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),
|
||||
localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/completions",
|
||||
openai.CompletionEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
app.Post("/v1/engines/:model/completions",
|
||||
openai.CompletionEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
// embeddings
|
||||
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
|
||||
// audio
|
||||
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
|
||||
// images
|
||||
app.Post("/v1/images/generations",
|
||||
re.BuildConstantDefaultModelNameMiddleware("stablediffusion"),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
|
||||
if application.ApplicationConfig().ImageDir != "" {
|
||||
app.Static("/generated-images", application.ApplicationConfig().ImageDir)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<footer class="text-center py-8">
|
||||
LocalAI Version {{.Version}}<br>
|
||||
<a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2025 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
|
||||
<a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
|
||||
</footer>
|
||||
<script src="static/assets/tw-elements.js"></script>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user