mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-02 18:53:32 -05:00
Compare commits
119 Commits
speculativ
...
v2.26.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
09941c0bfb | ||
|
|
cabe0f4993 | ||
|
|
1977c7f190 | ||
|
|
061e7c4eae | ||
|
|
5313e660f6 | ||
|
|
9e32fda304 | ||
|
|
83202cae54 | ||
|
|
d96addfa9d | ||
|
|
a715fe588d | ||
|
|
2ac4a86bb4 | ||
|
|
8670d480a6 | ||
|
|
af0b4ff237 | ||
|
|
e694764065 | ||
|
|
f3c27e0381 | ||
|
|
bf44319d0d | ||
|
|
5b133a640b | ||
|
|
0030a3fe75 | ||
|
|
0a748b009e | ||
|
|
257e951def | ||
|
|
fbd82a2dd0 | ||
|
|
5db321dad2 | ||
|
|
f5638a6354 | ||
|
|
5f64cc6328 | ||
|
|
28b10e8804 | ||
|
|
3277f5095d | ||
|
|
fe3ced2919 | ||
|
|
45e37a07bb | ||
|
|
e57b750ca3 | ||
|
|
49df492268 | ||
|
|
516cd660f1 | ||
|
|
8fd3ace9a1 | ||
|
|
099469cb05 | ||
|
|
6be8c0c618 | ||
|
|
3cddf24747 | ||
|
|
c330360785 | ||
|
|
8cd51570e5 | ||
|
|
0e7aa5cd15 | ||
|
|
e06a5f49de | ||
|
|
fb2f847507 | ||
|
|
e01acc88c9 | ||
|
|
7a5912908a | ||
|
|
4b1b942a7f | ||
|
|
230fe0098f | ||
|
|
cc163429dc | ||
|
|
f670e0a91c | ||
|
|
731674eee7 | ||
|
|
cc1f6f913f | ||
|
|
7f90ff7aec | ||
|
|
8d45670e41 | ||
|
|
e4b8ddb6a1 | ||
|
|
a801561f81 | ||
|
|
16ced07102 | ||
|
|
d35595372d | ||
|
|
81be192279 | ||
|
|
28a1310890 | ||
|
|
2a702e9ca4 | ||
|
|
3ecaea1b6e | ||
|
|
7daf5ac3e3 | ||
|
|
7bc80c17f8 | ||
|
|
1996ceb293 | ||
|
|
0bc3dc43da | ||
|
|
3324c4e6cb | ||
|
|
7329db4e78 | ||
|
|
464686aee6 | ||
|
|
bfa3d4ccff | ||
|
|
6a91288c8c | ||
|
|
96cb407ee0 | ||
|
|
5a19094d3a | ||
|
|
e3b943ffcb | ||
|
|
df30d6a482 | ||
|
|
c3c27b7e3d | ||
|
|
431716d4d6 | ||
|
|
d290fd159f | ||
|
|
051faaf771 | ||
|
|
41a2dfb0d9 | ||
|
|
ed0094c3d0 | ||
|
|
52fadeded1 | ||
|
|
a37fa8d9c4 | ||
|
|
03974a4dd4 | ||
|
|
1d6afbd65d | ||
|
|
d79f02ea09 | ||
|
|
ba2f426e3e | ||
|
|
732042e5c6 | ||
|
|
f1763aabf2 | ||
|
|
e0d90b173b | ||
|
|
ff07612bfa | ||
|
|
7badaf78a0 | ||
|
|
af41436f1b | ||
|
|
cd5489ce47 | ||
|
|
60ec2cf751 | ||
|
|
244f4b564f | ||
|
|
f1d6d65417 | ||
|
|
72e52c4f6a | ||
|
|
1656e1a88e | ||
|
|
7f62b418a4 | ||
|
|
1f4e66d638 | ||
|
|
a37b2c765c | ||
|
|
b4b67e00bd | ||
|
|
91e1ff5a95 | ||
|
|
d9204ea3b5 | ||
|
|
3d0fbcb4f7 | ||
|
|
03f3df9a82 | ||
|
|
fff35d5528 | ||
|
|
539e94db73 | ||
|
|
0f4f62cf3c | ||
|
|
e7cffd7afa | ||
|
|
26d790a2b6 | ||
|
|
5cf838c08d | ||
|
|
4db8f5cbce | ||
|
|
3b6b37a81b | ||
|
|
8f5aa2d9de | ||
|
|
a6bc8aa7c7 | ||
|
|
4ab107bc1a | ||
|
|
4c3710a531 | ||
|
|
901b06284a | ||
|
|
8eef5a2c5e | ||
|
|
e9cace137b | ||
|
|
9409c99738 | ||
|
|
4d44ebc2f2 |
@@ -1,23 +0,0 @@
|
||||
meta {
|
||||
name: musicgen
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model_id": "facebook/musicgen-small",
|
||||
"text": "Exciting 80s Newscast Interstitial",
|
||||
"duration_seconds": 8
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
meta {
|
||||
name: backend monitor
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}"
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
meta {
|
||||
name: backend-shutdown
|
||||
type: http
|
||||
seq: 3
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}"
|
||||
}
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
{
|
||||
"version": "1",
|
||||
"name": "LocalAI Test Requests",
|
||||
"type": "collection"
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
vars {
|
||||
HOST: localhost
|
||||
PORT: 8080
|
||||
DEFAULT_MODEL: gpt-3.5-turbo
|
||||
PROTOCOL: http://
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
meta {
|
||||
name: get models list
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
meta {
|
||||
name: Generate image
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"prompt": "<positive prompt>|<negative prompt>",
|
||||
"model": "model-name",
|
||||
"step": 51,
|
||||
"size": "1024x1024",
|
||||
"image": ""
|
||||
}
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
meta {
|
||||
name: -completions
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"prompt": "function downloadFile(string url, string outputPath) {",
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.5
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
meta {
|
||||
name: -edits
|
||||
type: http
|
||||
seq: 5
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "What day of the wek is it?",
|
||||
"instruction": "Fix the spelling mistakes"
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
meta {
|
||||
name: -embeddings
|
||||
type: http
|
||||
seq: 6
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
meta {
|
||||
name: chat completion -simple- 1 message-
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How could one use friction to cook an egg?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.2,
|
||||
"grammar": ""
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
meta {
|
||||
name: chat-completions -long-
|
||||
type: http
|
||||
seq: 5
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
|
||||
{"role": "user", "content": "How could one use electricity to cook an egg?"},
|
||||
{"role": "assistant",
|
||||
"content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
|
||||
},
|
||||
{"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.5
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
meta {
|
||||
name: chat-completions -stream-
|
||||
type: http
|
||||
seq: 6
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.9,
|
||||
"stream": true
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
meta {
|
||||
name: add model gallery
|
||||
type: http
|
||||
seq: 10
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
|
||||
"name": "test"
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
meta {
|
||||
name: delete model gallery
|
||||
type: http
|
||||
seq: 11
|
||||
}
|
||||
|
||||
delete {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"name": "test"
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
meta {
|
||||
name: list MODELS in galleries
|
||||
type: http
|
||||
seq: 7
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
meta {
|
||||
name: list model GALLERIES
|
||||
type: http
|
||||
seq: 8
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
meta {
|
||||
name: model delete
|
||||
type: http
|
||||
seq: 7
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
meta {
|
||||
name: model gallery apply -gist-
|
||||
type: http
|
||||
seq: 12
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
meta {
|
||||
name: model gallery apply
|
||||
type: http
|
||||
seq: 9
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
|
||||
"name": "codellama7b"
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@@ -1,16 +0,0 @@
|
||||
meta {
|
||||
name: transcribe
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
|
||||
body: multipartForm
|
||||
auth: none
|
||||
}
|
||||
|
||||
body:multipart-form {
|
||||
file: @file(transcription/gb1.ogg)
|
||||
model: whisper-1
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
meta {
|
||||
name: -tts
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
meta {
|
||||
name: musicgen
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"backend": "transformers",
|
||||
"model": "facebook/musicgen-small",
|
||||
"input": "80s Synths playing Jazz"
|
||||
}
|
||||
}
|
||||
2
.github/labeler.yml
vendored
2
.github/labeler.yml
vendored
@@ -1,4 +1,4 @@
|
||||
enhancements:
|
||||
enhancement:
|
||||
- head-branch: ['^feature', 'feature']
|
||||
|
||||
dependencies:
|
||||
|
||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@v2.2.0
|
||||
uses: dependabot/fetch-metadata@v2.3.0
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
skip-commit-verification: true
|
||||
|
||||
9
.github/workflows/generate_grpc_cache.yaml
vendored
9
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -2,9 +2,10 @@ name: 'generate and publish GRPC docker caches'
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
schedule:
|
||||
# daily at midnight
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
concurrency:
|
||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -16,7 +17,7 @@ jobs:
|
||||
matrix:
|
||||
include:
|
||||
- grpc-base-image: ubuntu:22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
runs-on: 'arc-runner-set'
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
steps:
|
||||
|
||||
4
.github/workflows/notify-models.yaml
vendored
4
.github/workflows/notify-models.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
|
||||
Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
44
Makefile
44
Makefile
@@ -6,9 +6,7 @@ BINARY_NAME=local-ai
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
|
||||
CPPLLAMA_VERSION?=300907b2110cc17b4337334dc397e05de2d8f5e0
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
@@ -24,7 +22,7 @@ BARKCPP_VERSION?=v1.0.0
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
|
||||
STABLEDIFFUSION_GGML_VERSION?=d46ed5e184b97c2018dc2e8105925bdb8775e02c
|
||||
|
||||
ONNX_VERSION?=1.20.0
|
||||
ONNX_ARCH?=x64
|
||||
@@ -151,7 +149,6 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
# llama-ggml has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
export GGML_HIP=1
|
||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
@@ -188,7 +185,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
@@ -222,19 +218,6 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## go-llama.cpp
|
||||
sources/go-llama.cpp:
|
||||
mkdir -p sources/go-llama.cpp
|
||||
cd sources/go-llama.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(GOLLAMA_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(GOLLAMA_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
## bark.cpp
|
||||
sources/bark.cpp:
|
||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||
@@ -310,19 +293,17 @@ sources/whisper.cpp:
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||
|
||||
prepare-sources: get-sources replace
|
||||
$(GOCMD) mod download
|
||||
@@ -330,7 +311,6 @@ prepare-sources: get-sources replace
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(GOCMD) clean -cache
|
||||
$(MAKE) -C sources/go-llama.cpp clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) build
|
||||
@@ -434,7 +414,7 @@ run: prepare ## run local-ai
|
||||
test-models/testmodel.ggml:
|
||||
mkdir test-models
|
||||
mkdir test-dir
|
||||
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
|
||||
wget -q https://huggingface.co/RichardErkhov/Qwen_-_Qwen2-1.5B-Instruct-gguf/resolve/main/Qwen2-1.5B-Instruct.Q2_K.gguf -O test-models/testmodel.ggml
|
||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||
@@ -449,8 +429,7 @@ test: prepare test-models/testmodel.ggml grpcs
|
||||
export GO_TAGS="tts debug"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(MAKE) test-llama
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(MAKE) test-llama-gguf
|
||||
$(MAKE) test-tts
|
||||
$(MAKE) test-stablediffusion
|
||||
@@ -479,10 +458,6 @@ teardown-e2e:
|
||||
rm -rf $(TEST_DIR) || true
|
||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||
|
||||
test-llama: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
|
||||
test-llama-gguf: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
@@ -760,13 +735,6 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/llama-ggml
|
||||
endif
|
||||
|
||||
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
||||
@@ -861,7 +829,7 @@ swagger:
|
||||
|
||||
.PHONY: gen-assets
|
||||
gen-assets:
|
||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
||||
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
|
||||
|
||||
## Documentation
|
||||
docs/layouts/_default:
|
||||
|
||||
8
aio/cpu/vad.yaml
Normal file
8
aio/cpu/vad.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
backend: silero-vad
|
||||
name: silero-vad
|
||||
parameters:
|
||||
model: silero-vad.onnx
|
||||
download_files:
|
||||
- filename: silero-vad.onnx
|
||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||
@@ -129,7 +129,7 @@ detect_gpu
|
||||
detect_gpu_size
|
||||
|
||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||
|
||||
check_vars
|
||||
|
||||
|
||||
8
aio/gpu-8g/vad.yaml
Normal file
8
aio/gpu-8g/vad.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
backend: silero-vad
|
||||
name: silero-vad
|
||||
parameters:
|
||||
model: silero-vad.onnx
|
||||
download_files:
|
||||
- filename: silero-vad.onnx
|
||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||
8
aio/intel/vad.yaml
Normal file
8
aio/intel/vad.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
backend: silero-vad
|
||||
name: silero-vad
|
||||
parameters:
|
||||
model: silero-vad.onnx
|
||||
download_files:
|
||||
- filename: silero-vad.onnx
|
||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||
@@ -163,6 +163,11 @@ message Reply {
|
||||
double timing_token_generation = 5;
|
||||
}
|
||||
|
||||
message GrammarTrigger {
|
||||
string word = 1;
|
||||
bool at_start = 2;
|
||||
}
|
||||
|
||||
message ModelOptions {
|
||||
string Model = 1;
|
||||
int32 ContextSize = 2;
|
||||
@@ -247,6 +252,8 @@ message ModelOptions {
|
||||
|
||||
string CacheTypeKey = 63;
|
||||
string CacheTypeValue = 64;
|
||||
|
||||
repeated GrammarTrigger GrammarTriggers = 65;
|
||||
}
|
||||
|
||||
message Result {
|
||||
|
||||
@@ -468,6 +468,9 @@ struct llama_server_context
|
||||
bool add_bos_token = true;
|
||||
bool has_eos_token = true;
|
||||
|
||||
bool grammar_lazy = false;
|
||||
std::vector<common_grammar_trigger> grammar_trigger_words;
|
||||
|
||||
int32_t n_ctx; // total context for all clients / slots
|
||||
|
||||
// system prompt
|
||||
@@ -706,6 +709,8 @@ struct llama_server_context
|
||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||
slot->sparams.grammar_trigger_words = grammar_trigger_words;
|
||||
slot->sparams.grammar_lazy = grammar_lazy;
|
||||
|
||||
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
||||
// Might be better to reject the request with a 400 ?
|
||||
@@ -1150,6 +1155,14 @@ struct llama_server_context
|
||||
slot.has_next_token = false;
|
||||
}
|
||||
|
||||
if (slot.n_past >= slot.n_ctx) {
|
||||
slot.truncated = true;
|
||||
slot.stopped_limit = true;
|
||||
slot.has_next_token = false;
|
||||
|
||||
LOG_VERBOSE("stopped due to running out of context capacity", {});
|
||||
}
|
||||
|
||||
if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
|
||||
{
|
||||
slot.stopped_eos = true;
|
||||
@@ -1622,17 +1635,17 @@ struct llama_server_context
|
||||
{
|
||||
if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx)
|
||||
{
|
||||
// this check is redundant (for good)
|
||||
// we should never get here, because generation should already stopped in process_token()
|
||||
|
||||
// START LOCALAI changes
|
||||
// Temporary disable context-shifting as it can lead to infinite loops (issue: https://github.com/ggerganov/llama.cpp/issues/3969)
|
||||
// See: https://github.com/mudler/LocalAI/issues/1333
|
||||
// Context is exhausted, release the slot
|
||||
slot.release();
|
||||
send_final_response(slot);
|
||||
slot.cache_tokens.clear();
|
||||
slot.n_past = 0;
|
||||
slot.truncated = false;
|
||||
slot.has_next_token = true;
|
||||
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||
slot.has_next_token = false;
|
||||
LOG_ERROR("context is exhausted, release the slot", {});
|
||||
|
||||
continue;
|
||||
// END LOCALAI changes
|
||||
@@ -2374,6 +2387,21 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
if ( request->ropefreqscale() != 0.0f ) {
|
||||
params.rope_freq_scale = request->ropefreqscale();
|
||||
}
|
||||
|
||||
if (request->grammartriggers_size() > 0) {
|
||||
LOG_INFO("configuring grammar triggers", {});
|
||||
llama.grammar_lazy = true;
|
||||
for (int i = 0; i < request->grammartriggers_size(); i++) {
|
||||
common_grammar_trigger trigger;
|
||||
trigger.word = request->grammartriggers(i).word();
|
||||
trigger.at_start = request->grammartriggers(i).at_start();
|
||||
llama.grammar_trigger_words.push_back(trigger);
|
||||
LOG_INFO("grammar trigger", {
|
||||
{ "word", trigger.word },
|
||||
{ "at_start", trigger.at_start }
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2522,6 +2550,18 @@ public:
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
|
||||
json data = parse_options(false, request, llama);
|
||||
|
||||
std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
|
||||
|
||||
for (int i=0 ; i< tokens.size(); i++){
|
||||
response->add_tokens(tokens[i]);
|
||||
}
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||
llama_client_slot* active_slot = llama.get_active_slot();
|
||||
|
||||
|
||||
@@ -1,204 +0,0 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.SingleThread
|
||||
|
||||
llama *llama.LLama
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
ropeFreqBase := float32(10000)
|
||||
ropeFreqScale := float32(1)
|
||||
|
||||
if opts.RopeFreqBase != 0 {
|
||||
ropeFreqBase = opts.RopeFreqBase
|
||||
}
|
||||
if opts.RopeFreqScale != 0 {
|
||||
ropeFreqScale = opts.RopeFreqScale
|
||||
}
|
||||
|
||||
llamaOpts := []llama.ModelOption{
|
||||
llama.WithRopeFreqBase(ropeFreqBase),
|
||||
llama.WithRopeFreqScale(ropeFreqScale),
|
||||
}
|
||||
|
||||
if opts.NGQA != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
|
||||
}
|
||||
|
||||
if opts.RMSNormEps != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
|
||||
}
|
||||
|
||||
if opts.ContextSize != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
|
||||
}
|
||||
if opts.F16Memory {
|
||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||
}
|
||||
if opts.Embeddings {
|
||||
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
||||
}
|
||||
if opts.NGPULayers != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
|
||||
}
|
||||
|
||||
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
|
||||
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
|
||||
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
|
||||
if opts.NBatch != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
|
||||
} else {
|
||||
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
|
||||
}
|
||||
|
||||
if opts.NUMA {
|
||||
llamaOpts = append(llamaOpts, llama.EnableNUMA)
|
||||
}
|
||||
|
||||
if opts.LowVRAM {
|
||||
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
|
||||
}
|
||||
|
||||
model, err := llama.New(opts.ModelFile, llamaOpts...)
|
||||
llm.llama = model
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
|
||||
ropeFreqBase := float32(10000)
|
||||
ropeFreqScale := float32(1)
|
||||
|
||||
if opts.RopeFreqBase != 0 {
|
||||
ropeFreqBase = opts.RopeFreqBase
|
||||
}
|
||||
if opts.RopeFreqScale != 0 {
|
||||
ropeFreqScale = opts.RopeFreqScale
|
||||
}
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(opts.Temperature),
|
||||
llama.SetTopP(opts.TopP),
|
||||
llama.SetTopK(int(opts.TopK)),
|
||||
llama.SetTokens(int(opts.Tokens)),
|
||||
llama.SetThreads(int(opts.Threads)),
|
||||
llama.WithGrammar(opts.Grammar),
|
||||
llama.SetRopeFreqBase(ropeFreqBase),
|
||||
llama.SetRopeFreqScale(ropeFreqScale),
|
||||
llama.SetNegativePromptScale(opts.NegativePromptScale),
|
||||
llama.SetNegativePrompt(opts.NegativePrompt),
|
||||
}
|
||||
|
||||
if opts.PromptCacheAll {
|
||||
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
|
||||
}
|
||||
|
||||
if opts.PromptCacheRO {
|
||||
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
|
||||
}
|
||||
|
||||
// Expected absolute path
|
||||
if opts.PromptCachePath != "" {
|
||||
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
|
||||
}
|
||||
|
||||
if opts.Mirostat != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
|
||||
}
|
||||
|
||||
if opts.MirostatETA != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
|
||||
}
|
||||
|
||||
if opts.MirostatTAU != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
|
||||
}
|
||||
|
||||
if opts.Debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
|
||||
|
||||
if opts.PresencePenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
|
||||
}
|
||||
|
||||
if opts.NKeep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
|
||||
}
|
||||
|
||||
if opts.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
|
||||
}
|
||||
|
||||
if opts.F16KV {
|
||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||
}
|
||||
|
||||
if opts.IgnoreEOS {
|
||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||
}
|
||||
|
||||
if opts.Seed != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
|
||||
}
|
||||
|
||||
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
|
||||
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
|
||||
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
|
||||
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
|
||||
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
|
||||
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
|
||||
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
|
||||
return predictOptions
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
|
||||
results <- token
|
||||
return true
|
||||
}))
|
||||
|
||||
go func() {
|
||||
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
|
||||
if err != nil {
|
||||
fmt.Println("err: ", err)
|
||||
}
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
if len(opts.EmbeddingTokens) > 0 {
|
||||
tokens := []int{}
|
||||
for _, t := range opts.EmbeddingTokens {
|
||||
tokens = append(tokens, int(t))
|
||||
}
|
||||
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
|
||||
}
|
||||
|
||||
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,4 +1,4 @@
|
||||
bark==0.1.5
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
packaging==24.1
|
||||
@@ -159,6 +159,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
torchType = torch.float16
|
||||
variant = "fp16"
|
||||
|
||||
options = request.Options
|
||||
|
||||
# empty dict
|
||||
self.options = {}
|
||||
|
||||
# The options are a list of strings in this form optname:optvalue
|
||||
# We are storing all the options in a dict so we can use it later when
|
||||
# generating the images
|
||||
for opt in options:
|
||||
key, value = opt.split(":")
|
||||
self.options[key] = value
|
||||
|
||||
local = False
|
||||
modelFile = request.Model
|
||||
|
||||
@@ -441,6 +453,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
||||
kwargs = {key: options.get(key) for key in keys if key in options}
|
||||
|
||||
# populate kwargs from self.options.
|
||||
kwargs.update(self.options)
|
||||
|
||||
# Set seed
|
||||
if request.seed > 0:
|
||||
kwargs["generator"] = torch.Generator(device=self.device).manual_seed(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
setuptools
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
pillow
|
||||
protobuf
|
||||
certifi
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
wheel
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
phonemizer
|
||||
scipy
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -5,4 +5,4 @@ accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
@@ -6,4 +6,4 @@ accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -5,4 +5,4 @@ numba==0.60.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -7,4 +7,4 @@ numba==0.60.0
|
||||
bitsandbytes
|
||||
outetts
|
||||
bitsandbytes
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -8,4 +8,4 @@ numba==0.60.0
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.70.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
@@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||
log.Error().Err(err).Msg("error installing models")
|
||||
}
|
||||
|
||||
@@ -145,13 +145,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
|
||||
if options.LoadToMemory != nil {
|
||||
for _, m := range options.LoadToMemory {
|
||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
|
||||
config.LoadOptionDebug(options.Debug),
|
||||
config.LoadOptionThreads(options.Threads),
|
||||
config.LoadOptionContextSize(options.ContextSize),
|
||||
config.LoadOptionF16(options.F16),
|
||||
config.ModelPath(options.ModelPath),
|
||||
)
|
||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ type TokenUsage struct {
|
||||
TimingTokenGeneration float64
|
||||
}
|
||||
|
||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
modelFile := c.Model
|
||||
|
||||
// Check if the modelFile exists, if it doesn't try to load it from the gallery
|
||||
@@ -48,7 +48,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
}
|
||||
}
|
||||
|
||||
opts := ModelOptions(c, o)
|
||||
opts := ModelOptions(*c, o)
|
||||
inferenceModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -84,7 +84,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
|
||||
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
||||
fn := func() (LLMResponse, error) {
|
||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||
opts := gRPCPredictOpts(*c, loader.ModelPath)
|
||||
opts.Prompt = s
|
||||
opts.Messages = protoMessages
|
||||
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
|
||||
|
||||
@@ -118,9 +118,19 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
nGPULayers = *c.NGPULayers
|
||||
}
|
||||
|
||||
triggers := make([]*pb.GrammarTrigger, 0)
|
||||
for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
|
||||
triggers = append(triggers, &pb.GrammarTrigger{
|
||||
Word: t.Word,
|
||||
AtStart: t.AtStart,
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return &pb.ModelOptions{
|
||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||
SchedulerType: c.Diffusers.SchedulerType,
|
||||
GrammarTriggers: triggers,
|
||||
PipelineType: c.Diffusers.PipelineType,
|
||||
CFGScale: c.CFGScale,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
|
||||
@@ -9,10 +9,10 @@ import (
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
rerankModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -13,7 +13,6 @@ import (
|
||||
)
|
||||
|
||||
func SoundGeneration(
|
||||
modelFile string,
|
||||
text string,
|
||||
duration *float32,
|
||||
temperature *float32,
|
||||
@@ -25,8 +24,9 @@ func SoundGeneration(
|
||||
backendConfig config.BackendConfig,
|
||||
) (string, *proto.Result, error) {
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
soundGenModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
@@ -44,7 +44,7 @@ func SoundGeneration(
|
||||
|
||||
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
|
||||
Text: text,
|
||||
Model: modelFile,
|
||||
Model: backendConfig.Model,
|
||||
Dst: filePath,
|
||||
Sample: doSample,
|
||||
Duration: duration,
|
||||
|
||||
@@ -4,24 +4,17 @@ import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
|
||||
|
||||
modelFile := backendConfig.Model
|
||||
|
||||
var inferenceModel grpc.Backend
|
||||
var err error
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
|
||||
if backendConfig.Backend == "" {
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
} else {
|
||||
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
}
|
||||
if err != nil {
|
||||
return schema.TokenizeResponse{}, err
|
||||
}
|
||||
@@ -35,6 +28,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
||||
return schema.TokenizeResponse{}, err
|
||||
}
|
||||
|
||||
if resp.Tokens == nil {
|
||||
resp.Tokens = make([]int32, 0)
|
||||
}
|
||||
|
||||
return schema.TokenizeResponse{
|
||||
Tokens: resp.Tokens,
|
||||
}, nil
|
||||
|
||||
@@ -47,7 +47,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
||||
tks = append(tks, int(t))
|
||||
}
|
||||
tr.Segments = append(tr.Segments,
|
||||
schema.Segment{
|
||||
schema.TranscriptionSegment{
|
||||
Text: s.Text,
|
||||
Id: int(s.Id),
|
||||
Start: time.Duration(s.Start),
|
||||
|
||||
@@ -14,28 +14,22 @@ import (
|
||||
)
|
||||
|
||||
func ModelTTS(
|
||||
backend,
|
||||
text,
|
||||
modelFile,
|
||||
voice,
|
||||
language string,
|
||||
loader *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
backendConfig config.BackendConfig,
|
||||
) (string, *proto.Result, error) {
|
||||
bb := backend
|
||||
if bb == "" {
|
||||
bb = model.PiperBackend
|
||||
}
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
|
||||
ttsModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
if ttsModel == nil {
|
||||
return "", nil, fmt.Errorf("could not load piper model")
|
||||
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
||||
@@ -45,22 +39,21 @@ func ModelTTS(
|
||||
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
|
||||
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
||||
|
||||
// If the model file is not empty, we pass it joined with the model path
|
||||
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
|
||||
// This should be addressed in a follow up PR soon.
|
||||
// Copying it over nearly verbatim, as TTS backends are not functional without this.
|
||||
modelPath := ""
|
||||
if modelFile != "" {
|
||||
// If the model file is not empty, we pass it joined with the model path
|
||||
// Checking first that it exists and is not outside ModelPath
|
||||
// TODO: we should actually first check if the modelFile is looking like
|
||||
// a FS path
|
||||
mp := filepath.Join(loader.ModelPath, modelFile)
|
||||
if _, err := os.Stat(mp); err == nil {
|
||||
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
modelPath = mp
|
||||
} else {
|
||||
modelPath = modelFile
|
||||
// Checking first that it exists and is not outside ModelPath
|
||||
// TODO: we should actually first check if the modelFile is looking like
|
||||
// a FS path
|
||||
mp := filepath.Join(loader.ModelPath, backendConfig.Model)
|
||||
if _, err := os.Stat(mp); err == nil {
|
||||
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
modelPath = mp
|
||||
} else {
|
||||
modelPath = backendConfig.Model // skip this step if it fails?????
|
||||
}
|
||||
|
||||
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
|
||||
|
||||
38
core/backend/vad.go
Normal file
38
core/backend/vad.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func VAD(request *schema.VADRequest,
|
||||
ctx context.Context,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
backendConfig config.BackendConfig) (*schema.VADResponse, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
vadModel, err := ml.Load(opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req := proto.VADRequest{
|
||||
Audio: request.Audio,
|
||||
}
|
||||
resp, err := vadModel.VAD(ctx, &req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
segments := []schema.VADSegment{}
|
||||
for _, s := range resp.Segments {
|
||||
segments = append(segments, schema.VADSegment{Start: s.Start, End: s.End})
|
||||
}
|
||||
|
||||
return &schema.VADResponse{
|
||||
Segments: segments,
|
||||
}, nil
|
||||
}
|
||||
@@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||
}
|
||||
|
||||
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||
err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -32,7 +32,6 @@ type RunCMD struct {
|
||||
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
||||
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
|
||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
||||
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
||||
@@ -90,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
||||
config.WithF16(r.F16),
|
||||
config.WithStringGalleries(r.Galleries),
|
||||
config.WithModelLibraryURL(r.RemoteLibrary),
|
||||
config.WithCors(r.CORS),
|
||||
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
||||
config.WithCsrf(r.CSRF),
|
||||
|
||||
@@ -86,13 +86,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
||||
options := config.BackendConfig{}
|
||||
options.SetDefaults()
|
||||
options.Backend = t.Backend
|
||||
options.Model = t.Model
|
||||
|
||||
var inputFile *string
|
||||
if t.InputFile != "" {
|
||||
inputFile = &t.InputFile
|
||||
}
|
||||
|
||||
filePath, _, err := backend.SoundGeneration(t.Model, text,
|
||||
filePath, _, err := backend.SoundGeneration(text,
|
||||
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
|
||||
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
|
||||
|
||||
|
||||
@@ -52,8 +52,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
|
||||
|
||||
options := config.BackendConfig{}
|
||||
options.SetDefaults()
|
||||
options.Backend = t.Backend
|
||||
options.Model = t.Model
|
||||
|
||||
filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
|
||||
filePath, _, err := backend.ModelTTS(text, t.Voice, t.Language, ml, opts, options)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -44,8 +44,6 @@ type ApplicationConfig struct {
|
||||
DisableGalleryEndpoint bool
|
||||
LoadToMemory []string
|
||||
|
||||
ModelLibraryURL string
|
||||
|
||||
Galleries []Gallery
|
||||
|
||||
BackendAssets embed.FS
|
||||
@@ -126,12 +124,6 @@ func WithP2PToken(s string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithModelLibraryURL(url string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ModelLibraryURL = url
|
||||
}
|
||||
}
|
||||
|
||||
func WithLibPath(path string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.LibPath = path
|
||||
|
||||
@@ -287,7 +287,8 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
defaultTopP := 0.95
|
||||
defaultTopK := 40
|
||||
defaultTemp := 0.9
|
||||
defaultMirostat := 2
|
||||
// https://github.com/mudler/LocalAI/issues/2780
|
||||
defaultMirostat := 0
|
||||
defaultMirostatTAU := 5.0
|
||||
defaultMirostatETA := 0.1
|
||||
defaultTypicalP := 1.0
|
||||
@@ -436,19 +437,21 @@ func (c *BackendConfig) HasTemplate() bool {
|
||||
type BackendConfigUsecases int
|
||||
|
||||
const (
|
||||
FLAG_ANY BackendConfigUsecases = 0b000000000
|
||||
FLAG_CHAT BackendConfigUsecases = 0b000000001
|
||||
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
|
||||
FLAG_EDIT BackendConfigUsecases = 0b000000100
|
||||
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
|
||||
FLAG_RERANK BackendConfigUsecases = 0b000010000
|
||||
FLAG_IMAGE BackendConfigUsecases = 0b000100000
|
||||
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
|
||||
FLAG_TTS BackendConfigUsecases = 0b010000000
|
||||
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
|
||||
FLAG_ANY BackendConfigUsecases = 0b00000000000
|
||||
FLAG_CHAT BackendConfigUsecases = 0b00000000001
|
||||
FLAG_COMPLETION BackendConfigUsecases = 0b00000000010
|
||||
FLAG_EDIT BackendConfigUsecases = 0b00000000100
|
||||
FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000
|
||||
FLAG_RERANK BackendConfigUsecases = 0b00000010000
|
||||
FLAG_IMAGE BackendConfigUsecases = 0b00000100000
|
||||
FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000
|
||||
FLAG_TTS BackendConfigUsecases = 0b00010000000
|
||||
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
|
||||
FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000
|
||||
FLAG_VAD BackendConfigUsecases = 0b10000000000
|
||||
|
||||
// Common Subsets
|
||||
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
|
||||
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
|
||||
)
|
||||
|
||||
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||
@@ -463,6 +466,8 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
|
||||
"FLAG_TTS": FLAG_TTS,
|
||||
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
|
||||
"FLAG_TOKENIZE": FLAG_TOKENIZE,
|
||||
"FLAG_VAD": FLAG_VAD,
|
||||
"FLAG_LLM": FLAG_LLM,
|
||||
}
|
||||
}
|
||||
@@ -548,5 +553,18 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||
}
|
||||
}
|
||||
|
||||
if (u & FLAG_TOKENIZE) == FLAG_TOKENIZE {
|
||||
tokenizeCapableBackends := []string{"llama.cpp", "rwkv"}
|
||||
if !slices.Contains(tokenizeCapableBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if (u & FLAG_VAD) == FLAG_VAD {
|
||||
if c.Backend != "silero-vad" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -81,10 +81,10 @@ func readMultipleBackendConfigsFromFile(file string, opts ...ConfigLoaderOption)
|
||||
c := &[]*BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot read config file %q: %w", file, err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot unmarshal config file %q: %w", file, err)
|
||||
}
|
||||
|
||||
for _, cc := range *c {
|
||||
@@ -101,10 +101,10 @@ func readBackendConfigFromFile(file string, opts ...ConfigLoaderOption) (*Backen
|
||||
c := &BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
return nil, fmt.Errorf("readBackendConfigFromFile cannot read config file %q: %w", file, err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
return nil, fmt.Errorf("readBackendConfigFromFile cannot unmarshal config file %q: %w", file, err)
|
||||
}
|
||||
|
||||
c.SetDefaults(opts...)
|
||||
@@ -117,7 +117,9 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
|
||||
// Load a config file if present after the model name
|
||||
cfg := &BackendConfig{
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
Model: modelName,
|
||||
BasicModelRequest: schema.BasicModelRequest{
|
||||
Model: modelName,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -145,6 +147,15 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func (bcl *BackendConfigLoader) LoadBackendConfigFileByNameDefaultOptions(modelName string, appConfig *ApplicationConfig) (*BackendConfig, error) {
|
||||
return bcl.LoadBackendConfigFileByName(modelName, appConfig.ModelPath,
|
||||
LoadOptionDebug(appConfig.Debug),
|
||||
LoadOptionThreads(appConfig.Threads),
|
||||
LoadOptionContextSize(appConfig.ContextSize),
|
||||
LoadOptionF16(appConfig.F16),
|
||||
ModelPath(appConfig.ModelPath))
|
||||
}
|
||||
|
||||
// This format is currently only used when reading a single file at startup, passed in via ApplicationConfig.ConfigFile
|
||||
func (bcl *BackendConfigLoader) LoadMultipleBackendConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
@@ -167,7 +178,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoa
|
||||
defer bcl.Unlock()
|
||||
c, err := readBackendConfigFromFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read config file: %w", err)
|
||||
return fmt.Errorf("LoadBackendConfig cannot read config file %q: %w", file, err)
|
||||
}
|
||||
|
||||
if c.Validate() {
|
||||
@@ -324,9 +335,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read directory '%s': %w", path, err)
|
||||
return fmt.Errorf("LoadBackendConfigsFromPath cannot read directory '%s': %w", path, err)
|
||||
}
|
||||
files := make([]fs.FileInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
@@ -344,13 +356,13 @@ func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...
|
||||
}
|
||||
c, err := readBackendConfigFromFile(filepath.Join(path, file.Name()), opts...)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("cannot read config file: %s", file.Name())
|
||||
log.Error().Err(err).Str("File Name", file.Name()).Msgf("LoadBackendConfigsFromPath cannot read config file")
|
||||
continue
|
||||
}
|
||||
if c.Validate() {
|
||||
bcl.configs[c.Name] = *c
|
||||
} else {
|
||||
log.Error().Err(err).Msgf("config is not valid")
|
||||
log.Error().Err(err).Str("Name", c.Name).Msgf("config is not valid")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,9 +48,9 @@ parameters:
|
||||
Expect(config.Name).To(Equal("bar-baz"))
|
||||
Expect(config.Validate()).To(BeTrue())
|
||||
|
||||
// download https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml
|
||||
// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
|
||||
httpClient := http.Client{}
|
||||
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml")
|
||||
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml")
|
||||
Expect(err).To(BeNil())
|
||||
defer resp.Body.Close()
|
||||
tmp, err = os.CreateTemp("", "config.yaml")
|
||||
|
||||
@@ -161,10 +161,11 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
|
||||
}
|
||||
|
||||
// We try to guess only if we don't have a template defined already
|
||||
f, err := gguf.ParseGGUFFile(filepath.Join(modelPath, cfg.ModelFileName()))
|
||||
guessPath := filepath.Join(modelPath, cfg.ModelFileName())
|
||||
f, err := gguf.ParseGGUFFile(guessPath)
|
||||
if err != nil {
|
||||
// Only valid for gguf files
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
|
||||
log.Debug().Str("filePath", guessPath).Msg("guessDefaultsFromFile: not a GGUF file")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -48,8 +48,10 @@ var _ = Describe("Model test", func() {
|
||||
defer os.RemoveAll(tempdir)
|
||||
|
||||
gallery := []GalleryModel{{
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
Metadata: Metadata{
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
},
|
||||
}}
|
||||
out, err := yaml.Marshal(gallery)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
@@ -11,6 +11,14 @@ import (
|
||||
// It is used to install the model by resolving the URL and downloading the files.
|
||||
// The other fields are used to override the configuration of the model.
|
||||
type GalleryModel struct {
|
||||
Metadata `json:",inline" yaml:",inline"`
|
||||
// config_file is read in the situation where URL is blank - and therefore this is a base config.
|
||||
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
|
||||
// Overrides are used to override the configuration of the model located at URL
|
||||
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
|
||||
}
|
||||
|
||||
type Metadata struct {
|
||||
URL string `json:"url,omitempty" yaml:"url,omitempty"`
|
||||
Name string `json:"name,omitempty" yaml:"name,omitempty"`
|
||||
Description string `json:"description,omitempty" yaml:"description,omitempty"`
|
||||
@@ -18,10 +26,6 @@ type GalleryModel struct {
|
||||
URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"`
|
||||
Icon string `json:"icon,omitempty" yaml:"icon,omitempty"`
|
||||
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
|
||||
// config_file is read in the situation where URL is blank - and therefore this is a base config.
|
||||
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
|
||||
// Overrides are used to override the configuration of the model located at URL
|
||||
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
|
||||
// AdditionalFiles are used to add additional files to the model
|
||||
AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
|
||||
// Gallery is a reference to the gallery which contains the model
|
||||
|
||||
@@ -9,7 +9,11 @@ import (
|
||||
var _ = Describe("Gallery API tests", func() {
|
||||
Context("requests", func() {
|
||||
It("parses github with a branch", func() {
|
||||
req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
|
||||
req := GalleryModel{
|
||||
Metadata: Metadata{
|
||||
URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
|
||||
},
|
||||
}
|
||||
e, err := GetGalleryConfigFromURL(req.URL, "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(e.Name).To(Equal("gpt4all-j"))
|
||||
|
||||
@@ -130,7 +130,6 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
return metricsService.Shutdown()
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
// Health Checks should always be exempt from auth, so register these first
|
||||
routes.HealthRoutes(router)
|
||||
@@ -167,13 +166,15 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
galleryService := services.NewGalleryService(application.ApplicationConfig())
|
||||
galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
|
||||
|
||||
routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
routes.RegisterOpenAIRoutes(router, application)
|
||||
requestExtractor := middleware.NewRequestExtractor(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
|
||||
routes.RegisterElevenLabsRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
routes.RegisterLocalAIRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
routes.RegisterOpenAIRoutes(router, requestExtractor, application)
|
||||
if !application.ApplicationConfig().DisableWebUI {
|
||||
routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
}
|
||||
routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
routes.RegisterJINARoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
|
||||
httpFS := http.FS(embedDirStatic)
|
||||
|
||||
|
||||
@@ -299,14 +299,18 @@ var _ = Describe("API test", func() {
|
||||
|
||||
g := []gallery.GalleryModel{
|
||||
{
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
Metadata: gallery.Metadata{
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "bert2",
|
||||
URL: bertEmbeddingsURL,
|
||||
Overrides: map[string]interface{}{"foo": "bar"},
|
||||
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
|
||||
Metadata: gallery.Metadata{
|
||||
Name: "bert2",
|
||||
URL: bertEmbeddingsURL,
|
||||
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
|
||||
},
|
||||
Overrides: map[string]interface{}{"foo": "bar"},
|
||||
},
|
||||
}
|
||||
out, err := yaml.Marshal(g)
|
||||
@@ -476,7 +480,7 @@ var _ = Describe("API test", func() {
|
||||
})
|
||||
It("apply models from config", func() {
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
@@ -522,77 +526,6 @@ var _ = Describe("API test", func() {
|
||||
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||
})
|
||||
|
||||
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
||||
Name: "openllama_3b",
|
||||
Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
|
||||
uuid := response["uuid"].(string)
|
||||
|
||||
Eventually(func() bool {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
return response["processed"].(bool)
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
|
||||
By("testing completion")
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
|
||||
|
||||
By("testing functions")
|
||||
resp2, err := client.CreateChatCompletion(
|
||||
context.TODO(),
|
||||
openai.ChatCompletionRequest{
|
||||
Model: "openllama_3b",
|
||||
Messages: []openai.ChatCompletionMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "What is the weather like in San Francisco (celsius)?",
|
||||
},
|
||||
},
|
||||
Functions: []openai.FunctionDefinition{
|
||||
openai.FunctionDefinition{
|
||||
Name: "get_current_weather",
|
||||
Description: "Get the current weather",
|
||||
Parameters: jsonschema.Definition{
|
||||
Type: jsonschema.Object,
|
||||
Properties: map[string]jsonschema.Definition{
|
||||
"location": {
|
||||
Type: jsonschema.String,
|
||||
Description: "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {
|
||||
Type: jsonschema.String,
|
||||
Enum: []string{"celcius", "fahrenheit"},
|
||||
},
|
||||
},
|
||||
Required: []string{"location"},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp2.Choices)).To(Equal(1))
|
||||
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
|
||||
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
|
||||
|
||||
var res map[string]string
|
||||
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
|
||||
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||
|
||||
})
|
||||
|
||||
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
@@ -600,7 +533,7 @@ var _ = Describe("API test", func() {
|
||||
|
||||
modelName := "hermes-2-pro-mistral"
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
package fiberContext
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// ModelFromContext returns the model from the context
|
||||
// If no model is specified, it will take the first available
|
||||
// Takes a model string as input which should be the one received from the user request.
|
||||
// It returns the model name resolved from the context and an error if any.
|
||||
func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
|
||||
if ctx.Params("model") != "" {
|
||||
modelInput = ctx.Params("model")
|
||||
}
|
||||
if ctx.Query("model") != "" {
|
||||
modelInput = ctx.Query("model")
|
||||
}
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
|
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelInput == "" && !bearerExists && firstModel {
|
||||
models, _ := services.ListModels(cl, loader, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||
if len(models) > 0 {
|
||||
modelInput = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelInput)
|
||||
} else {
|
||||
log.Debug().Msgf("No model specified, returning error")
|
||||
return "", fmt.Errorf("no model specified")
|
||||
}
|
||||
}
|
||||
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists {
|
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||
modelInput = bearer
|
||||
}
|
||||
return modelInput, nil
|
||||
}
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -17,45 +17,21 @@ import (
|
||||
// @Router /v1/sound-generation [post]
|
||||
func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(schema.ElevenLabsSoundGenerationRequest)
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.ElevenLabsSoundGenerationRequest)
|
||||
if !ok || input.ModelID == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
|
||||
if err != nil {
|
||||
modelFile = input.ModelID
|
||||
log.Warn().Str("ModelID", input.ModelID).Msg("Model not found in context")
|
||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
if err != nil {
|
||||
modelFile = input.ModelID
|
||||
log.Warn().Str("Request ModelID", input.ModelID).Err(err).Msg("error during LoadBackendConfigFileByName, using request ModelID")
|
||||
} else {
|
||||
if input.ModelID != "" {
|
||||
modelFile = input.ModelID
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
}
|
||||
log.Debug().Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Sound Generation Request about to be sent to backend")
|
||||
|
||||
if input.Duration != nil {
|
||||
log.Debug().Float32("duration", *input.Duration).Msg("duration set")
|
||||
}
|
||||
if input.Temperature != nil {
|
||||
log.Debug().Float32("temperature", *input.Temperature).Msg("temperature set")
|
||||
}
|
||||
|
||||
// TODO: Support uploading files?
|
||||
filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
||||
filePath, _, err := backend.SoundGeneration(input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ package elevenlabs
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
@@ -20,39 +20,21 @@ import (
|
||||
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input := new(schema.ElevenLabsTTSRequest)
|
||||
voiceID := c.Params("voice-id")
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.ElevenLabsTTSRequest)
|
||||
if !ok || input.ModelID == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
|
||||
if err != nil {
|
||||
modelFile = input.ModelID
|
||||
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
|
||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
if err != nil {
|
||||
modelFile = input.ModelID
|
||||
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
|
||||
} else {
|
||||
if input.ModelID != "" {
|
||||
modelFile = input.ModelID
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
log.Debug().Str("modelName", input.ModelID).Msg("elevenlabs TTS request recieved")
|
||||
|
||||
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, "", voiceID, ml, appConfig, *cfg)
|
||||
filePath, _, err := backend.ModelTTS(input.Text, voiceID, input.LanguageCode, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -3,9 +3,9 @@ package jina
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -19,58 +19,32 @@ import (
|
||||
// @Router /v1/rerank [post]
|
||||
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
req := new(schema.JINARerankRequest)
|
||||
if err := c.BodyParser(req); err != nil {
|
||||
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
|
||||
"error": "Cannot parse JSON",
|
||||
})
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.JINARerankRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
input := new(schema.TTSRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
if input.Backend != "" {
|
||||
cfg.Backend = input.Backend
|
||||
}
|
||||
log.Debug().Str("model", input.Model).Msg("JINA Rerank Request recieved")
|
||||
|
||||
request := &proto.RerankRequest{
|
||||
Query: req.Query,
|
||||
TopN: int32(req.TopN),
|
||||
Documents: req.Documents,
|
||||
Query: input.Query,
|
||||
TopN: int32(input.TopN),
|
||||
Documents: input.Documents,
|
||||
}
|
||||
|
||||
results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
|
||||
results, err := backend.Rerank(request, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
response := &schema.JINARerankResponse{
|
||||
Model: req.Model,
|
||||
Model: input.Model,
|
||||
}
|
||||
|
||||
for _, r := range results.Results {
|
||||
|
||||
@@ -117,19 +117,25 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
|
||||
// @Router /models/available [get]
|
||||
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
|
||||
|
||||
models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
log.Debug().Msgf("Models found from galleries: %+v", models)
|
||||
for _, m := range models {
|
||||
log.Debug().Msgf("Model found from galleries: %+v", m)
|
||||
|
||||
log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries))
|
||||
|
||||
m := []gallery.Metadata{}
|
||||
|
||||
for _, mm := range models {
|
||||
m = append(m, mm.Metadata)
|
||||
}
|
||||
dat, err := json.Marshal(models)
|
||||
|
||||
log.Debug().Msgf("Models %#v", m)
|
||||
|
||||
dat, err := json.Marshal(m)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("could not marshal models: %w", err)
|
||||
}
|
||||
return c.Send(dat)
|
||||
}
|
||||
|
||||
@@ -4,13 +4,15 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
// TODO: This is not yet in use. Needs middleware rework, since it is not referenced.
|
||||
|
||||
// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
|
||||
//
|
||||
// @Summary Get TokenMetrics for Active Slot.
|
||||
@@ -29,18 +31,13 @@ func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader,
|
||||
return err
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
||||
if !ok || modelFile != "" {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
cfg, err := cl.LoadBackendConfigFileByNameDefaultOptions(modelFile, appConfig)
|
||||
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
|
||||
@@ -4,55 +4,32 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// TokenizeEndpoint exposes a REST API to tokenize the content
|
||||
// @Summary Tokenize the input.
|
||||
// @Param request body schema.TokenizeRequest true "Request"
|
||||
// @Success 200 {object} schema.TokenizeResponse "Response"
|
||||
// @Router /v1/tokenize [post]
|
||||
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input := new(schema.TokenizeRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
return func(ctx *fiber.Ctx) error {
|
||||
input, ok := ctx.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TokenizeRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
cfg, ok := ctx.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.JSON(tokenResponse)
|
||||
return nil
|
||||
|
||||
return ctx.JSON(tokenResponse)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ package localai
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
@@ -24,37 +24,24 @@ import (
|
||||
// @Router /tts [post]
|
||||
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(schema.TTSRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TTSRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
log.Debug().Str("model", input.Model).Msg("LocalAI TTS Request recieved")
|
||||
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
if input.Backend != "" {
|
||||
cfg.Backend = input.Backend
|
||||
if cfg.Backend == "" {
|
||||
if input.Backend != "" {
|
||||
cfg.Backend = input.Backend
|
||||
} else {
|
||||
cfg.Backend = model.PiperBackend
|
||||
}
|
||||
}
|
||||
|
||||
if input.Language != "" {
|
||||
@@ -65,7 +52,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
||||
cfg.Voice = input.Voice
|
||||
}
|
||||
|
||||
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
|
||||
filePath, _, err := backend.ModelTTS(input.Input, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -4,9 +4,8 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -19,45 +18,20 @@ import (
|
||||
// @Router /vad [post]
|
||||
func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(schema.VADRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.VADRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
log.Debug().Str("model", input.Model).Msg("LocalAI VAD Request recieved")
|
||||
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
resp, err := backend.VAD(input, c.Context(), ml, appConfig, *cfg)
|
||||
|
||||
opts := backend.ModelOptions(*cfg, appConfig, model.WithBackendString(cfg.Backend), model.WithModel(modelFile))
|
||||
|
||||
vadModel, err := ml.Load(opts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req := proto.VADRequest{
|
||||
Audio: input.Audio,
|
||||
}
|
||||
resp, err := vadModel.VAD(c.Context(), &req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -5,18 +5,19 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
@@ -174,26 +175,20 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
textContentToReturn = ""
|
||||
id = uuid.New().String()
|
||||
created = int(time.Now().Unix())
|
||||
// Set CorrelationID
|
||||
correlationID := c.Get("X-Correlation-ID")
|
||||
if len(strings.TrimSpace(correlationID)) == 0 {
|
||||
correlationID = id
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
c.Set("X-Correlation-ID", correlationID)
|
||||
|
||||
// Opt-in extra usage flag
|
||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||
|
||||
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
log.Debug().Msgf("Configuration read: %+v", config)
|
||||
log.Debug().Msgf("Chat endpoint configuration read: %+v", config)
|
||||
|
||||
funcs := input.Functions
|
||||
shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
|
||||
@@ -401,6 +396,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
|
||||
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
|
||||
|
||||
finishReason := "stop"
|
||||
if len(input.Tools) > 0 {
|
||||
finishReason = "tool_calls"
|
||||
}
|
||||
|
||||
switch {
|
||||
case noActionsToRun:
|
||||
result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput)
|
||||
@@ -408,19 +408,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
log.Error().Err(err).Msg("error handling question")
|
||||
return
|
||||
}
|
||||
|
||||
*c = append(*c, schema.Choice{
|
||||
Message: &schema.Message{Role: "assistant", Content: &result}})
|
||||
FinishReason: finishReason,
|
||||
Message: &schema.Message{Role: "assistant", Content: &result}})
|
||||
default:
|
||||
toolChoice := schema.Choice{
|
||||
FinishReason: finishReason,
|
||||
Message: &schema.Message{
|
||||
Role: "assistant",
|
||||
},
|
||||
}
|
||||
|
||||
if len(input.Tools) > 0 {
|
||||
toolChoice.FinishReason = "tool_calls"
|
||||
}
|
||||
|
||||
for _, ss := range results {
|
||||
name, args := ss.Name, ss.Arguments
|
||||
if len(input.Tools) > 0 {
|
||||
@@ -438,7 +437,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
},
|
||||
)
|
||||
} else {
|
||||
// otherwise we return more choices directly
|
||||
// otherwise we return more choices directly (deprecated)
|
||||
*c = append(*c, schema.Choice{
|
||||
FinishReason: "function_call",
|
||||
Message: &schema.Message{
|
||||
@@ -539,7 +538,7 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
|
||||
audios = append(audios, m.StringAudios...)
|
||||
}
|
||||
|
||||
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil)
|
||||
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, o, nil)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("model inference failed")
|
||||
return "", err
|
||||
|
||||
@@ -10,12 +10,13 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
@@ -27,10 +28,9 @@ import (
|
||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||
// @Router /v1/completions [post]
|
||||
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
|
||||
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
||||
process := func(id string, s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
||||
ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||
usage := schema.OpenAIUsage{
|
||||
PromptTokens: tokenUsage.Prompt,
|
||||
@@ -63,22 +63,18 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
// Add Correlation
|
||||
c.Set("X-Correlation-ID", id)
|
||||
|
||||
// Opt-in extra usage flag
|
||||
// Handle Correlation
|
||||
id := c.Get("X-Correlation-ID", uuid.New().String())
|
||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||
|
||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
log.Debug().Msgf("`input`: %+v", input)
|
||||
|
||||
config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
if config.ResponseFormatMap != nil {
|
||||
@@ -122,7 +118,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
|
||||
responses := make(chan schema.OpenAIResponse)
|
||||
|
||||
go process(predInput, input, config, ml, responses, extraUsage)
|
||||
go process(id, predInput, input, config, ml, responses, extraUsage)
|
||||
|
||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||
|
||||
|
||||
@@ -2,16 +2,17 @@ package openai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -25,20 +26,21 @@ import (
|
||||
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
// Opt-in extra usage flag
|
||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||
|
||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
log.Debug().Msgf("Edit Endpoint Input : %+v", input)
|
||||
log.Debug().Msgf("Edit Endpoint Config: %+v", *config)
|
||||
|
||||
var result []schema.Choice
|
||||
totalTokenUsage := backend.TokenUsage{}
|
||||
|
||||
@@ -2,11 +2,11 @@ package openai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -23,14 +23,14 @@ import (
|
||||
// @Router /v1/embeddings [post]
|
||||
func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
model, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
@@ -66,25 +67,23 @@ func downloadFile(url string) (string, error) {
|
||||
// @Router /v1/images/generations [post]
|
||||
func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
m, input, err := readRequest(c, cl, ml, appConfig, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
log.Error().Msg("Image Endpoint - Invalid Input")
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
if m == "" {
|
||||
m = "stablediffusion"
|
||||
}
|
||||
log.Debug().Msgf("Loading model: %+v", m)
|
||||
|
||||
config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
log.Error().Msg("Image Endpoint - Invalid Config")
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
src := ""
|
||||
if input.File != "" {
|
||||
|
||||
fileData := []byte{}
|
||||
var err error
|
||||
// check if input.File is an URL, if so download it and save it
|
||||
// to a temporary file
|
||||
if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
|
||||
|
||||
@@ -37,7 +37,7 @@ func ComputeChoices(
|
||||
}
|
||||
|
||||
// get the model function to call for the result
|
||||
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, *config, o, tokenCallback)
|
||||
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, config, o, tokenCallback)
|
||||
if err != nil {
|
||||
return result, backend.TokenUsage{}, err
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -10,6 +9,8 @@ import (
|
||||
|
||||
"github.com/mudler/LocalAI/core/backend"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
@@ -25,15 +26,16 @@ import (
|
||||
// @Router /v1/audio/transcriptions [post]
|
||||
func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
m, input, err := readRequest(c, cl, ml, appConfig, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request: %w", err)
|
||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || config == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
// retrieve the file data from the request
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
|
||||
@@ -1,326 +1,450 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type correlationIDKeyType string
|
||||
|
||||
// CorrelationIDKey to track request across process boundary
|
||||
const CorrelationIDKey correlationIDKeyType = "correlationID"
|
||||
|
||||
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
||||
input := new(schema.OpenAIRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
||||
}
|
||||
|
||||
received, _ := json.Marshal(input)
|
||||
// Extract or generate the correlation ID
|
||||
correlationID := c.Get("X-Correlation-ID", uuid.New().String())
|
||||
|
||||
ctx, cancel := context.WithCancel(o.Context)
|
||||
// Add the correlation ID to the new context
|
||||
ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID)
|
||||
|
||||
input.Context = ctxWithCorrelationID
|
||||
input.Cancel = cancel
|
||||
|
||||
log.Debug().Msgf("Request received: %s", string(received))
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, firstModel)
|
||||
|
||||
return modelFile, input, err
|
||||
}
|
||||
|
||||
func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
}
|
||||
if input.TopK != nil {
|
||||
config.TopK = input.TopK
|
||||
}
|
||||
if input.TopP != nil {
|
||||
config.TopP = input.TopP
|
||||
}
|
||||
|
||||
if input.Backend != "" {
|
||||
config.Backend = input.Backend
|
||||
}
|
||||
|
||||
if input.ClipSkip != 0 {
|
||||
config.Diffusers.ClipSkip = input.ClipSkip
|
||||
}
|
||||
|
||||
if input.ModelBaseName != "" {
|
||||
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
|
||||
}
|
||||
|
||||
if input.NegativePromptScale != 0 {
|
||||
config.NegativePromptScale = input.NegativePromptScale
|
||||
}
|
||||
|
||||
if input.UseFastTokenizer {
|
||||
config.UseFastTokenizer = input.UseFastTokenizer
|
||||
}
|
||||
|
||||
if input.NegativePrompt != "" {
|
||||
config.NegativePrompt = input.NegativePrompt
|
||||
}
|
||||
|
||||
if input.RopeFreqBase != 0 {
|
||||
config.RopeFreqBase = input.RopeFreqBase
|
||||
}
|
||||
|
||||
if input.RopeFreqScale != 0 {
|
||||
config.RopeFreqScale = input.RopeFreqScale
|
||||
}
|
||||
|
||||
if input.Grammar != "" {
|
||||
config.Grammar = input.Grammar
|
||||
}
|
||||
|
||||
if input.Temperature != nil {
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
|
||||
if input.Maxtokens != nil {
|
||||
config.Maxtokens = input.Maxtokens
|
||||
}
|
||||
|
||||
if input.ResponseFormat != nil {
|
||||
switch responseFormat := input.ResponseFormat.(type) {
|
||||
case string:
|
||||
config.ResponseFormat = responseFormat
|
||||
case map[string]interface{}:
|
||||
config.ResponseFormatMap = responseFormat
|
||||
}
|
||||
}
|
||||
|
||||
switch stop := input.Stop.(type) {
|
||||
case string:
|
||||
if stop != "" {
|
||||
config.StopWords = append(config.StopWords, stop)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range stop {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.StopWords = append(config.StopWords, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(input.Tools) > 0 {
|
||||
for _, tool := range input.Tools {
|
||||
input.Functions = append(input.Functions, tool.Function)
|
||||
}
|
||||
}
|
||||
|
||||
if input.ToolsChoice != nil {
|
||||
var toolChoice functions.Tool
|
||||
|
||||
switch content := input.ToolsChoice.(type) {
|
||||
case string:
|
||||
_ = json.Unmarshal([]byte(content), &toolChoice)
|
||||
case map[string]interface{}:
|
||||
dat, _ := json.Marshal(content)
|
||||
_ = json.Unmarshal(dat, &toolChoice)
|
||||
}
|
||||
input.FunctionCall = map[string]interface{}{
|
||||
"name": toolChoice.Function.Name,
|
||||
}
|
||||
}
|
||||
|
||||
// Decode each request's message content
|
||||
imgIndex, vidIndex, audioIndex := 0, 0, 0
|
||||
for i, m := range input.Messages {
|
||||
nrOfImgsInMessage := 0
|
||||
nrOfVideosInMessage := 0
|
||||
nrOfAudiosInMessage := 0
|
||||
|
||||
switch content := m.Content.(type) {
|
||||
case string:
|
||||
input.Messages[i].StringContent = content
|
||||
case []interface{}:
|
||||
dat, _ := json.Marshal(content)
|
||||
c := []schema.Content{}
|
||||
json.Unmarshal(dat, &c)
|
||||
|
||||
textContent := ""
|
||||
// we will template this at the end
|
||||
|
||||
CONTENT:
|
||||
for _, pp := range c {
|
||||
switch pp.Type {
|
||||
case "text":
|
||||
textContent += pp.Text
|
||||
//input.Messages[i].StringContent = pp.Text
|
||||
case "video", "video_url":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding video: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
|
||||
vidIndex++
|
||||
nrOfVideosInMessage++
|
||||
case "audio_url", "audio":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding image: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
|
||||
audioIndex++
|
||||
nrOfAudiosInMessage++
|
||||
case "image_url", "image":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding image: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
|
||||
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||
|
||||
imgIndex++
|
||||
nrOfImgsInMessage++
|
||||
}
|
||||
}
|
||||
|
||||
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{
|
||||
TotalImages: imgIndex,
|
||||
TotalVideos: vidIndex,
|
||||
TotalAudios: audioIndex,
|
||||
ImagesInMessage: nrOfImgsInMessage,
|
||||
VideosInMessage: nrOfVideosInMessage,
|
||||
AudiosInMessage: nrOfAudiosInMessage,
|
||||
}, textContent)
|
||||
}
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
|
||||
if input.FrequencyPenalty != 0 {
|
||||
config.FrequencyPenalty = input.FrequencyPenalty
|
||||
}
|
||||
|
||||
if input.PresencePenalty != 0 {
|
||||
config.PresencePenalty = input.PresencePenalty
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
config.Keep = input.Keep
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
config.Batch = input.Batch
|
||||
}
|
||||
|
||||
if input.IgnoreEOS {
|
||||
config.IgnoreEOS = input.IgnoreEOS
|
||||
}
|
||||
|
||||
if input.Seed != nil {
|
||||
config.Seed = input.Seed
|
||||
}
|
||||
|
||||
if input.TypicalP != nil {
|
||||
config.TypicalP = input.TypicalP
|
||||
}
|
||||
|
||||
switch inputs := input.Input.(type) {
|
||||
case string:
|
||||
if inputs != "" {
|
||||
config.InputStrings = append(config.InputStrings, inputs)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range inputs {
|
||||
switch i := pp.(type) {
|
||||
case string:
|
||||
config.InputStrings = append(config.InputStrings, i)
|
||||
case []interface{}:
|
||||
tokens := []int{}
|
||||
for _, ii := range i {
|
||||
tokens = append(tokens, int(ii.(float64)))
|
||||
}
|
||||
config.InputToken = append(config.InputToken, tokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Can be either a string or an object
|
||||
switch fnc := input.FunctionCall.(type) {
|
||||
case string:
|
||||
if fnc != "" {
|
||||
config.SetFunctionCallString(fnc)
|
||||
}
|
||||
case map[string]interface{}:
|
||||
var name string
|
||||
n, exists := fnc["name"]
|
||||
if exists {
|
||||
nn, e := n.(string)
|
||||
if e {
|
||||
name = nn
|
||||
}
|
||||
}
|
||||
config.SetFunctionCallNameString(name)
|
||||
}
|
||||
|
||||
switch p := input.Prompt.(type) {
|
||||
case string:
|
||||
config.PromptStrings = append(config.PromptStrings, p)
|
||||
case []interface{}:
|
||||
for _, pp := range p {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.PromptStrings = append(config.PromptStrings, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a quality was defined as number, convert it to step
|
||||
if input.Quality != "" {
|
||||
q, err := strconv.Atoi(input.Quality)
|
||||
if err == nil {
|
||||
config.Step = q
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
|
||||
cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath,
|
||||
config.LoadOptionDebug(debug),
|
||||
config.LoadOptionThreads(threads),
|
||||
config.LoadOptionContextSize(ctx),
|
||||
config.LoadOptionF16(f16),
|
||||
)
|
||||
|
||||
// Set the parameters for the language model prediction
|
||||
updateRequestConfig(cfg, input)
|
||||
|
||||
if !cfg.Validate() {
|
||||
return nil, nil, fmt.Errorf("failed to validate config")
|
||||
}
|
||||
|
||||
return cfg, input, err
|
||||
}
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/functions"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/templates"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type correlationIDKeyType string
|
||||
|
||||
// CorrelationIDKey to track request across process boundary
|
||||
const CorrelationIDKey correlationIDKeyType = "correlationID"
|
||||
|
||||
type RequestExtractor struct {
|
||||
backendConfigLoader *config.BackendConfigLoader
|
||||
modelLoader *model.ModelLoader
|
||||
applicationConfig *config.ApplicationConfig
|
||||
}
|
||||
|
||||
func NewRequestExtractor(backendConfigLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, applicationConfig *config.ApplicationConfig) *RequestExtractor {
|
||||
return &RequestExtractor{
|
||||
backendConfigLoader: backendConfigLoader,
|
||||
modelLoader: modelLoader,
|
||||
applicationConfig: applicationConfig,
|
||||
}
|
||||
}
|
||||
|
||||
const CONTEXT_LOCALS_KEY_MODEL_NAME = "MODEL_NAME"
|
||||
const CONTEXT_LOCALS_KEY_LOCALAI_REQUEST = "LOCALAI_REQUEST"
|
||||
const CONTEXT_LOCALS_KEY_MODEL_CONFIG = "MODEL_CONFIG"
|
||||
|
||||
// TODO: Refactor to not return error if unchanged
|
||||
func (re *RequestExtractor) setModelNameFromRequest(ctx *fiber.Ctx) {
|
||||
model, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
||||
if ok && model != "" {
|
||||
return
|
||||
}
|
||||
model = ctx.Params("model")
|
||||
|
||||
if (model == "") && ctx.Query("model") != "" {
|
||||
model = ctx.Query("model")
|
||||
}
|
||||
|
||||
if model == "" {
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // "Bearer " => "Bear" to please go-staticcheck. It looks dumb but we might as well take free performance on something called for nearly every request.
|
||||
if bearer != "" {
|
||||
exists, err := services.CheckIfModelExists(re.backendConfigLoader, re.modelLoader, bearer, services.ALWAYS_INCLUDE)
|
||||
if err == nil && exists {
|
||||
model = bearer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, model)
|
||||
}
|
||||
|
||||
func (re *RequestExtractor) BuildConstantDefaultModelNameMiddleware(defaultModelName string) fiber.Handler {
|
||||
return func(ctx *fiber.Ctx) error {
|
||||
re.setModelNameFromRequest(ctx)
|
||||
localModelName, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
||||
if !ok || localModelName == "" {
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, defaultModelName)
|
||||
log.Debug().Str("defaultModelName", defaultModelName).Msg("context local model name not found, setting to default")
|
||||
}
|
||||
return ctx.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func (re *RequestExtractor) BuildFilteredFirstAvailableDefaultModel(filterFn config.BackendConfigFilterFn) fiber.Handler {
|
||||
return func(ctx *fiber.Ctx) error {
|
||||
re.setModelNameFromRequest(ctx)
|
||||
localModelName := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
||||
if localModelName != "" { // Don't overwrite existing values
|
||||
return ctx.Next()
|
||||
}
|
||||
|
||||
modelNames, err := services.ListModels(re.backendConfigLoader, re.modelLoader, filterFn, services.SKIP_IF_CONFIGURED)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("non-fatal error calling ListModels during SetDefaultModelNameToFirstAvailable()")
|
||||
return ctx.Next()
|
||||
}
|
||||
|
||||
if len(modelNames) == 0 {
|
||||
log.Warn().Msg("SetDefaultModelNameToFirstAvailable used with no matching models installed")
|
||||
// This is non-fatal - making it so was breaking the case of direct installation of raw models
|
||||
// return errors.New("this endpoint requires at least one model to be installed")
|
||||
return ctx.Next()
|
||||
}
|
||||
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, modelNames[0])
|
||||
log.Debug().Str("first model name", modelNames[0]).Msg("context local model name not found, setting to the first model")
|
||||
return ctx.Next()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: If context and cancel above belong on all methods, move that part of above into here!
|
||||
// Otherwise, it's in its own method below for now
|
||||
func (re *RequestExtractor) SetModelAndConfig(initializer func() schema.LocalAIRequest) fiber.Handler {
|
||||
return func(ctx *fiber.Ctx) error {
|
||||
input := initializer()
|
||||
if input == nil {
|
||||
return fmt.Errorf("unable to initialize body")
|
||||
}
|
||||
if err := ctx.BodyParser(input); err != nil {
|
||||
return fmt.Errorf("failed parsing request body: %w", err)
|
||||
}
|
||||
|
||||
// If this request doesn't have an associated model name, fetch it from earlier in the middleware chain
|
||||
if input.ModelName(nil) == "" {
|
||||
localModelName, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
||||
if ok && localModelName != "" {
|
||||
log.Debug().Str("context localModelName", localModelName).Msg("overriding empty model name in request body with value found earlier in middleware chain")
|
||||
input.ModelName(&localModelName)
|
||||
}
|
||||
}
|
||||
|
||||
cfg, err := re.backendConfigLoader.LoadBackendConfigFileByNameDefaultOptions(input.ModelName(nil), re.applicationConfig)
|
||||
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
log.Warn().Msgf("Model Configuration File not found for %q", input.ModelName(nil))
|
||||
} else if cfg.Model == "" && input.ModelName(nil) != "" {
|
||||
log.Debug().Str("input.ModelName", input.ModelName(nil)).Msg("config does not include model, using input")
|
||||
cfg.Model = input.ModelName(nil)
|
||||
}
|
||||
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST, input)
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG, cfg)
|
||||
|
||||
return ctx.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func (re *RequestExtractor) SetOpenAIRequest(ctx *fiber.Ctx) error {
|
||||
input, ok := ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
||||
if !ok || input.Model == "" {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
cfg, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
||||
if !ok || cfg == nil {
|
||||
return fiber.ErrBadRequest
|
||||
}
|
||||
|
||||
// Extract or generate the correlation ID
|
||||
correlationID := ctx.Get("X-Correlation-ID", uuid.New().String())
|
||||
ctx.Set("X-Correlation-ID", correlationID)
|
||||
|
||||
c1, cancel := context.WithCancel(re.applicationConfig.Context)
|
||||
// Add the correlation ID to the new context
|
||||
ctxWithCorrelationID := context.WithValue(c1, CorrelationIDKey, correlationID)
|
||||
|
||||
input.Context = ctxWithCorrelationID
|
||||
input.Cancel = cancel
|
||||
|
||||
err := mergeOpenAIRequestAndBackendConfig(cfg, input)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cfg.Model == "" {
|
||||
log.Debug().Str("input.Model", input.Model).Msg("replacing empty cfg.Model with input value")
|
||||
cfg.Model = input.Model
|
||||
}
|
||||
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST, input)
|
||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG, cfg)
|
||||
|
||||
return ctx.Next()
|
||||
}
|
||||
|
||||
func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *schema.OpenAIRequest) error {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
}
|
||||
if input.TopK != nil {
|
||||
config.TopK = input.TopK
|
||||
}
|
||||
if input.TopP != nil {
|
||||
config.TopP = input.TopP
|
||||
}
|
||||
|
||||
if input.Backend != "" {
|
||||
config.Backend = input.Backend
|
||||
}
|
||||
|
||||
if input.ClipSkip != 0 {
|
||||
config.Diffusers.ClipSkip = input.ClipSkip
|
||||
}
|
||||
|
||||
if input.ModelBaseName != "" {
|
||||
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
|
||||
}
|
||||
|
||||
if input.NegativePromptScale != 0 {
|
||||
config.NegativePromptScale = input.NegativePromptScale
|
||||
}
|
||||
|
||||
if input.UseFastTokenizer {
|
||||
config.UseFastTokenizer = input.UseFastTokenizer
|
||||
}
|
||||
|
||||
if input.NegativePrompt != "" {
|
||||
config.NegativePrompt = input.NegativePrompt
|
||||
}
|
||||
|
||||
if input.RopeFreqBase != 0 {
|
||||
config.RopeFreqBase = input.RopeFreqBase
|
||||
}
|
||||
|
||||
if input.RopeFreqScale != 0 {
|
||||
config.RopeFreqScale = input.RopeFreqScale
|
||||
}
|
||||
|
||||
if input.Grammar != "" {
|
||||
config.Grammar = input.Grammar
|
||||
}
|
||||
|
||||
if input.Temperature != nil {
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
|
||||
if input.Maxtokens != nil {
|
||||
config.Maxtokens = input.Maxtokens
|
||||
}
|
||||
|
||||
if input.ResponseFormat != nil {
|
||||
switch responseFormat := input.ResponseFormat.(type) {
|
||||
case string:
|
||||
config.ResponseFormat = responseFormat
|
||||
case map[string]interface{}:
|
||||
config.ResponseFormatMap = responseFormat
|
||||
}
|
||||
}
|
||||
|
||||
switch stop := input.Stop.(type) {
|
||||
case string:
|
||||
if stop != "" {
|
||||
config.StopWords = append(config.StopWords, stop)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range stop {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.StopWords = append(config.StopWords, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(input.Tools) > 0 {
|
||||
for _, tool := range input.Tools {
|
||||
input.Functions = append(input.Functions, tool.Function)
|
||||
}
|
||||
}
|
||||
|
||||
if input.ToolsChoice != nil {
|
||||
var toolChoice functions.Tool
|
||||
|
||||
switch content := input.ToolsChoice.(type) {
|
||||
case string:
|
||||
_ = json.Unmarshal([]byte(content), &toolChoice)
|
||||
case map[string]interface{}:
|
||||
dat, _ := json.Marshal(content)
|
||||
_ = json.Unmarshal(dat, &toolChoice)
|
||||
}
|
||||
input.FunctionCall = map[string]interface{}{
|
||||
"name": toolChoice.Function.Name,
|
||||
}
|
||||
}
|
||||
|
||||
// Decode each request's message content
|
||||
imgIndex, vidIndex, audioIndex := 0, 0, 0
|
||||
for i, m := range input.Messages {
|
||||
nrOfImgsInMessage := 0
|
||||
nrOfVideosInMessage := 0
|
||||
nrOfAudiosInMessage := 0
|
||||
|
||||
switch content := m.Content.(type) {
|
||||
case string:
|
||||
input.Messages[i].StringContent = content
|
||||
case []interface{}:
|
||||
dat, _ := json.Marshal(content)
|
||||
c := []schema.Content{}
|
||||
json.Unmarshal(dat, &c)
|
||||
|
||||
textContent := ""
|
||||
// we will template this at the end
|
||||
|
||||
CONTENT:
|
||||
for _, pp := range c {
|
||||
switch pp.Type {
|
||||
case "text":
|
||||
textContent += pp.Text
|
||||
//input.Messages[i].StringContent = pp.Text
|
||||
case "video", "video_url":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding video: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
|
||||
vidIndex++
|
||||
nrOfVideosInMessage++
|
||||
case "audio_url", "audio":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding image: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
|
||||
audioIndex++
|
||||
nrOfAudiosInMessage++
|
||||
case "image_url", "image":
|
||||
// Decode content as base64 either if it's an URL or base64 text
|
||||
base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
|
||||
if err != nil {
|
||||
log.Error().Msgf("Failed encoding image: %s", err)
|
||||
continue CONTENT
|
||||
}
|
||||
|
||||
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||
|
||||
imgIndex++
|
||||
nrOfImgsInMessage++
|
||||
}
|
||||
}
|
||||
|
||||
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(config.TemplateConfig.Multimodal, templates.MultiModalOptions{
|
||||
TotalImages: imgIndex,
|
||||
TotalVideos: vidIndex,
|
||||
TotalAudios: audioIndex,
|
||||
ImagesInMessage: nrOfImgsInMessage,
|
||||
VideosInMessage: nrOfVideosInMessage,
|
||||
AudiosInMessage: nrOfAudiosInMessage,
|
||||
}, textContent)
|
||||
}
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
|
||||
if input.FrequencyPenalty != 0 {
|
||||
config.FrequencyPenalty = input.FrequencyPenalty
|
||||
}
|
||||
|
||||
if input.PresencePenalty != 0 {
|
||||
config.PresencePenalty = input.PresencePenalty
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
config.Keep = input.Keep
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
config.Batch = input.Batch
|
||||
}
|
||||
|
||||
if input.IgnoreEOS {
|
||||
config.IgnoreEOS = input.IgnoreEOS
|
||||
}
|
||||
|
||||
if input.Seed != nil {
|
||||
config.Seed = input.Seed
|
||||
}
|
||||
|
||||
if input.TypicalP != nil {
|
||||
config.TypicalP = input.TypicalP
|
||||
}
|
||||
|
||||
log.Debug().Str("input.Input", fmt.Sprintf("%+v", input.Input))
|
||||
|
||||
switch inputs := input.Input.(type) {
|
||||
case string:
|
||||
if inputs != "" {
|
||||
config.InputStrings = append(config.InputStrings, inputs)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range inputs {
|
||||
switch i := pp.(type) {
|
||||
case string:
|
||||
config.InputStrings = append(config.InputStrings, i)
|
||||
case []interface{}:
|
||||
tokens := []int{}
|
||||
for _, ii := range i {
|
||||
tokens = append(tokens, int(ii.(float64)))
|
||||
}
|
||||
config.InputToken = append(config.InputToken, tokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Can be either a string or an object
|
||||
switch fnc := input.FunctionCall.(type) {
|
||||
case string:
|
||||
if fnc != "" {
|
||||
config.SetFunctionCallString(fnc)
|
||||
}
|
||||
case map[string]interface{}:
|
||||
var name string
|
||||
n, exists := fnc["name"]
|
||||
if exists {
|
||||
nn, e := n.(string)
|
||||
if e {
|
||||
name = nn
|
||||
}
|
||||
}
|
||||
config.SetFunctionCallNameString(name)
|
||||
}
|
||||
|
||||
switch p := input.Prompt.(type) {
|
||||
case string:
|
||||
config.PromptStrings = append(config.PromptStrings, p)
|
||||
case []interface{}:
|
||||
for _, pp := range p {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.PromptStrings = append(config.PromptStrings, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a quality was defined as number, convert it to step
|
||||
if input.Quality != "" {
|
||||
q, err := strconv.Atoi(input.Quality)
|
||||
if err == nil {
|
||||
config.Step = q
|
||||
}
|
||||
}
|
||||
|
||||
if config.Validate() {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("unable to validate configuration after merging")
|
||||
}
|
||||
@@ -4,17 +4,26 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/elevenlabs"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func RegisterElevenLabsRoutes(app *fiber.App,
|
||||
re *middleware.RequestExtractor,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig) {
|
||||
|
||||
// Elevenlabs
|
||||
app.Post("/v1/text-to-speech/:voice-id", elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/text-to-speech/:voice-id",
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.ElevenLabsTTSRequest) }),
|
||||
elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
app.Post("/v1/sound-generation", elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/sound-generation",
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_SOUND_GENERATION)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.ElevenLabsSoundGenerationRequest) }),
|
||||
elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
||||
|
||||
}
|
||||
|
||||
@@ -3,16 +3,22 @@ package routes
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/jina"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func RegisterJINARoutes(app *fiber.App,
|
||||
re *middleware.RequestExtractor,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig) {
|
||||
|
||||
// POST endpoint to mimic the reranking
|
||||
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/rerank",
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_RERANK)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.JINARerankRequest) }),
|
||||
jina.JINARerankEndpoint(cl, ml, appConfig))
|
||||
}
|
||||
|
||||
@@ -5,13 +5,16 @@ import (
|
||||
"github.com/gofiber/swagger"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func RegisterLocalAIRoutes(router *fiber.App,
|
||||
requestExtractor *middleware.RequestExtractor,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
@@ -33,8 +36,18 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||
router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||
}
|
||||
|
||||
router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
|
||||
router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
|
||||
router.Post("/tts",
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),
|
||||
localai.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
vadChain := []fiber.Handler{
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VAD)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VADRequest) }),
|
||||
localai.VADEndpoint(cl, ml, appConfig),
|
||||
}
|
||||
router.Post("/vad", vadChain...)
|
||||
router.Post("/v1/vad", vadChain...)
|
||||
|
||||
// Stores
|
||||
sl := model.NewModelLoader("")
|
||||
@@ -47,10 +60,14 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||
}
|
||||
|
||||
// Experimental Backend Statistics Module
|
||||
// Backend Statistics Module
|
||||
// TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
|
||||
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
||||
router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
||||
router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
||||
// The v1/* urls are exactly the same as above - makes local e2e testing easier if they are registered.
|
||||
router.Get("/v1/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
||||
router.Post("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
||||
|
||||
// p2p
|
||||
if p2p.IsP2PEnabled() {
|
||||
@@ -67,6 +84,9 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||
router.Get("/system", localai.SystemInformations(ml, appConfig))
|
||||
|
||||
// misc
|
||||
router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
|
||||
router.Post("/v1/tokenize",
|
||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
|
||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }),
|
||||
localai.TokenizeEndpoint(cl, ml, appConfig))
|
||||
|
||||
}
|
||||
|
||||
@@ -3,51 +3,50 @@ package routes
|
||||
import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/application"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
||||
"github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
)
|
||||
|
||||
func RegisterOpenAIRoutes(app *fiber.App,
|
||||
re *middleware.RequestExtractor,
|
||||
application *application.Application) {
|
||||
// openAI compatible API endpoint
|
||||
|
||||
// chat
|
||||
app.Post("/v1/chat/completions",
|
||||
openai.ChatEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
app.Post("/chat/completions",
|
||||
openai.ChatEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
chatChain := []fiber.Handler{
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.ChatEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
|
||||
}
|
||||
app.Post("/v1/chat/completions", chatChain...)
|
||||
app.Post("/chat/completions", chatChain...)
|
||||
|
||||
// edit
|
||||
app.Post("/v1/edits",
|
||||
openai.EditEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
editChain := []fiber.Handler{
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EDIT)),
|
||||
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.EditEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
|
||||
}
|
||||
app.Post("/v1/edits", editChain...)
|
||||
app.Post("/edits", editChain...)
|
||||
|
||||
app.Post("/edits",
|
||||
openai.EditEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
// completion
|
||||
completionChain := []fiber.Handler{
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_COMPLETION)),
|
||||
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.CompletionEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
|
||||
}
|
||||
app.Post("/v1/completions", completionChain...)
|
||||
app.Post("/completions", completionChain...)
|
||||
app.Post("/v1/engines/:model/completions", completionChain...)
|
||||
|
||||
// assistant
|
||||
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
@@ -81,45 +80,37 @@ func RegisterOpenAIRoutes(app *fiber.App,
|
||||
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||
|
||||
// completion
|
||||
app.Post("/v1/completions",
|
||||
openai.CompletionEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
app.Post("/completions",
|
||||
openai.CompletionEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
app.Post("/v1/engines/:model/completions",
|
||||
openai.CompletionEndpoint(
|
||||
application.BackendLoader(),
|
||||
application.ModelLoader(),
|
||||
application.TemplatesEvaluator(),
|
||||
application.ApplicationConfig(),
|
||||
),
|
||||
)
|
||||
|
||||
// embeddings
|
||||
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
embeddingChain := []fiber.Handler{
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EMBEDDINGS)),
|
||||
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()),
|
||||
}
|
||||
app.Post("/v1/embeddings", embeddingChain...)
|
||||
app.Post("/embeddings", embeddingChain...)
|
||||
app.Post("/v1/engines/:model/embeddings", embeddingChain...)
|
||||
|
||||
// audio
|
||||
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/audio/transcriptions",
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()),
|
||||
)
|
||||
|
||||
app.Post("/v1/audio/speech",
|
||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),
|
||||
localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
|
||||
// images
|
||||
app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Post("/v1/images/generations",
|
||||
re.BuildConstantDefaultModelNameMiddleware("stablediffusion"),
|
||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
||||
re.SetOpenAIRequest,
|
||||
openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
|
||||
if application.ApplicationConfig().ImageDir != "" {
|
||||
app.Static("/generated-images", application.ApplicationConfig().ImageDir)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<footer class="text-center py-8">
|
||||
LocalAI Version {{.Version}}<br>
|
||||
<a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
|
||||
<a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2025 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
|
||||
</footer>
|
||||
<script src="static/assets/tw-elements.js"></script>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user