mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-25 01:02:05 -04:00
Compare commits
1 Commits
v2.26.0
...
speculativ
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b16a01d0bd |
23
.bruno/LocalAI Test Requests/Sound Generation/musicgen.bru
Normal file
23
.bruno/LocalAI Test Requests/Sound Generation/musicgen.bru
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
meta {
|
||||||
|
name: musicgen
|
||||||
|
type: http
|
||||||
|
seq: 1
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model_id": "facebook/musicgen-small",
|
||||||
|
"text": "Exciting 80s Newscast Interstitial",
|
||||||
|
"duration_seconds": 8
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
meta {
|
||||||
|
name: backend monitor
|
||||||
|
type: http
|
||||||
|
seq: 4
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
meta {
|
||||||
|
name: backend-shutdown
|
||||||
|
type: http
|
||||||
|
seq: 3
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}"
|
||||||
|
}
|
||||||
|
}
|
||||||
5
.bruno/LocalAI Test Requests/bruno.json
Normal file
5
.bruno/LocalAI Test Requests/bruno.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"version": "1",
|
||||||
|
"name": "LocalAI Test Requests",
|
||||||
|
"type": "collection"
|
||||||
|
}
|
||||||
6
.bruno/LocalAI Test Requests/environments/localhost.bru
Normal file
6
.bruno/LocalAI Test Requests/environments/localhost.bru
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
vars {
|
||||||
|
HOST: localhost
|
||||||
|
PORT: 8080
|
||||||
|
DEFAULT_MODEL: gpt-3.5-turbo
|
||||||
|
PROTOCOL: http://
|
||||||
|
}
|
||||||
11
.bruno/LocalAI Test Requests/get models list.bru
Normal file
11
.bruno/LocalAI Test Requests/get models list.bru
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: get models list
|
||||||
|
type: http
|
||||||
|
seq: 2
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
meta {
|
||||||
|
name: Generate image
|
||||||
|
type: http
|
||||||
|
seq: 1
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"prompt": "<positive prompt>|<negative prompt>",
|
||||||
|
"model": "model-name",
|
||||||
|
"step": 51,
|
||||||
|
"size": "1024x1024",
|
||||||
|
"image": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
24
.bruno/LocalAI Test Requests/llm text/-completions.bru
Normal file
24
.bruno/LocalAI Test Requests/llm text/-completions.bru
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
meta {
|
||||||
|
name: -completions
|
||||||
|
type: http
|
||||||
|
seq: 4
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"prompt": "function downloadFile(string url, string outputPath) {",
|
||||||
|
"max_tokens": 256,
|
||||||
|
"temperature": 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
23
.bruno/LocalAI Test Requests/llm text/-edits.bru
Normal file
23
.bruno/LocalAI Test Requests/llm text/-edits.bru
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
meta {
|
||||||
|
name: -edits
|
||||||
|
type: http
|
||||||
|
seq: 5
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"input": "What day of the wek is it?",
|
||||||
|
"instruction": "Fix the spelling mistakes"
|
||||||
|
}
|
||||||
|
}
|
||||||
22
.bruno/LocalAI Test Requests/llm text/-embeddings.bru
Normal file
22
.bruno/LocalAI Test Requests/llm text/-embeddings.bru
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: -embeddings
|
||||||
|
type: http
|
||||||
|
seq: 6
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
meta {
|
||||||
|
name: chat completion -simple- 1 message-
|
||||||
|
type: http
|
||||||
|
seq: 4
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "How could one use friction to cook an egg?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 256,
|
||||||
|
"temperature": 0.2,
|
||||||
|
"grammar": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
meta {
|
||||||
|
name: chat-completions -long-
|
||||||
|
type: http
|
||||||
|
seq: 5
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
|
||||||
|
{"role": "user", "content": "How could one use electricity to cook an egg?"},
|
||||||
|
{"role": "assistant",
|
||||||
|
"content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
|
||||||
|
},
|
||||||
|
{"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"temperature": 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
meta {
|
||||||
|
name: chat-completions -stream-
|
||||||
|
type: http
|
||||||
|
seq: 6
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
|
||||||
|
"max_tokens": 256,
|
||||||
|
"temperature": 0.9,
|
||||||
|
"stream": true
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: add model gallery
|
||||||
|
type: http
|
||||||
|
seq: 10
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
|
||||||
|
"name": "test"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
meta {
|
||||||
|
name: delete model gallery
|
||||||
|
type: http
|
||||||
|
seq: 11
|
||||||
|
}
|
||||||
|
|
||||||
|
delete {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"name": "test"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: list MODELS in galleries
|
||||||
|
type: http
|
||||||
|
seq: 7
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: list model GALLERIES
|
||||||
|
type: http
|
||||||
|
seq: 8
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
11
.bruno/LocalAI Test Requests/model gallery/model delete.bru
Normal file
11
.bruno/LocalAI Test Requests/model gallery/model delete.bru
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: model delete
|
||||||
|
type: http
|
||||||
|
seq: 7
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
meta {
|
||||||
|
name: model gallery apply -gist-
|
||||||
|
type: http
|
||||||
|
seq: 12
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: model gallery apply
|
||||||
|
type: http
|
||||||
|
seq: 9
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
|
||||||
|
"name": "codellama7b"
|
||||||
|
}
|
||||||
|
}
|
||||||
BIN
.bruno/LocalAI Test Requests/transcription/gb1.ogg
Normal file
BIN
.bruno/LocalAI Test Requests/transcription/gb1.ogg
Normal file
Binary file not shown.
16
.bruno/LocalAI Test Requests/transcription/transcribe.bru
Normal file
16
.bruno/LocalAI Test Requests/transcription/transcribe.bru
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
meta {
|
||||||
|
name: transcribe
|
||||||
|
type: http
|
||||||
|
seq: 1
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
|
||||||
|
body: multipartForm
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
body:multipart-form {
|
||||||
|
file: @file(transcription/gb1.ogg)
|
||||||
|
model: whisper-1
|
||||||
|
}
|
||||||
22
.bruno/LocalAI Test Requests/tts/-tts.bru
Normal file
22
.bruno/LocalAI Test Requests/tts/-tts.bru
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: -tts
|
||||||
|
type: http
|
||||||
|
seq: 2
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||||
|
}
|
||||||
|
}
|
||||||
23
.bruno/LocalAI Test Requests/tts/musicgen.bru
Normal file
23
.bruno/LocalAI Test Requests/tts/musicgen.bru
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
meta {
|
||||||
|
name: musicgen
|
||||||
|
type: http
|
||||||
|
seq: 2
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"backend": "transformers",
|
||||||
|
"model": "facebook/musicgen-small",
|
||||||
|
"input": "80s Synths playing Jazz"
|
||||||
|
}
|
||||||
|
}
|
||||||
2
.github/labeler.yml
vendored
2
.github/labeler.yml
vendored
@@ -1,4 +1,4 @@
|
|||||||
enhancement:
|
enhancements:
|
||||||
- head-branch: ['^feature', 'feature']
|
- head-branch: ['^feature', 'feature']
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
|
|||||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Dependabot metadata
|
- name: Dependabot metadata
|
||||||
id: metadata
|
id: metadata
|
||||||
uses: dependabot/fetch-metadata@v2.3.0
|
uses: dependabot/fetch-metadata@v2.2.0
|
||||||
with:
|
with:
|
||||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||||
skip-commit-verification: true
|
skip-commit-verification: true
|
||||||
|
|||||||
9
.github/workflows/generate_grpc_cache.yaml
vendored
9
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -2,10 +2,9 @@ name: 'generate and publish GRPC docker caches'
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
push:
|
||||||
schedule:
|
branches:
|
||||||
# daily at midnight
|
- master
|
||||||
- cron: '0 0 * * *'
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
@@ -17,7 +16,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- grpc-base-image: ubuntu:22.04
|
- grpc-base-image: ubuntu:22.04
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'ubuntu-latest'
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
4
.github/workflows/notify-models.yaml
vendored
4
.github/workflows/notify-models.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
json_diff_file_output: diff.json
|
json_diff_file_output: diff.json
|
||||||
@@ -99,7 +99,7 @@ jobs:
|
|||||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
json_diff_file_output: diff.json
|
json_diff_file_output: diff.json
|
||||||
|
|||||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)
|
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
44
Makefile
44
Makefile
@@ -6,7 +6,9 @@ BINARY_NAME=local-ai
|
|||||||
DETECT_LIBS?=true
|
DETECT_LIBS?=true
|
||||||
|
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
CPPLLAMA_VERSION?=300907b2110cc17b4337334dc397e05de2d8f5e0
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
|
CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
@@ -22,7 +24,7 @@ BARKCPP_VERSION?=v1.0.0
|
|||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=d46ed5e184b97c2018dc2e8105925bdb8775e02c
|
STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
|
||||||
|
|
||||||
ONNX_VERSION?=1.20.0
|
ONNX_VERSION?=1.20.0
|
||||||
ONNX_ARCH?=x64
|
ONNX_ARCH?=x64
|
||||||
@@ -149,6 +151,7 @@ ifeq ($(BUILD_TYPE),hipblas)
|
|||||||
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
|
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
|
||||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||||
|
# llama-ggml has no hipblas support, so override it here.
|
||||||
export STABLE_BUILD_TYPE=
|
export STABLE_BUILD_TYPE=
|
||||||
export GGML_HIP=1
|
export GGML_HIP=1
|
||||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||||
@@ -185,6 +188,7 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
|||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||||
@@ -218,6 +222,19 @@ endif
|
|||||||
|
|
||||||
all: help
|
all: help
|
||||||
|
|
||||||
|
## go-llama.cpp
|
||||||
|
sources/go-llama.cpp:
|
||||||
|
mkdir -p sources/go-llama.cpp
|
||||||
|
cd sources/go-llama.cpp && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(GOLLAMA_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout $(GOLLAMA_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||||
|
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
## bark.cpp
|
## bark.cpp
|
||||||
sources/bark.cpp:
|
sources/bark.cpp:
|
||||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||||
@@ -293,17 +310,19 @@ sources/whisper.cpp:
|
|||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||||
|
|
||||||
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||||
|
|
||||||
dropreplace:
|
dropreplace:
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
|
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
$(GOCMD) mod download
|
$(GOCMD) mod download
|
||||||
@@ -311,6 +330,7 @@ prepare-sources: get-sources replace
|
|||||||
## GENERIC
|
## GENERIC
|
||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
|
$(MAKE) -C sources/go-llama.cpp clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-piper clean
|
$(MAKE) -C sources/go-piper clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
@@ -414,7 +434,7 @@ run: prepare ## run local-ai
|
|||||||
test-models/testmodel.ggml:
|
test-models/testmodel.ggml:
|
||||||
mkdir test-models
|
mkdir test-models
|
||||||
mkdir test-dir
|
mkdir test-dir
|
||||||
wget -q https://huggingface.co/RichardErkhov/Qwen_-_Qwen2-1.5B-Instruct-gguf/resolve/main/Qwen2-1.5B-Instruct.Q2_K.gguf -O test-models/testmodel.ggml
|
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
|
||||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||||
@@ -429,7 +449,8 @@ test: prepare test-models/testmodel.ggml grpcs
|
|||||||
export GO_TAGS="tts debug"
|
export GO_TAGS="tts debug"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
|
$(MAKE) test-llama
|
||||||
$(MAKE) test-llama-gguf
|
$(MAKE) test-llama-gguf
|
||||||
$(MAKE) test-tts
|
$(MAKE) test-tts
|
||||||
$(MAKE) test-stablediffusion
|
$(MAKE) test-stablediffusion
|
||||||
@@ -458,6 +479,10 @@ teardown-e2e:
|
|||||||
rm -rf $(TEST_DIR) || true
|
rm -rf $(TEST_DIR) || true
|
||||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||||
|
|
||||||
|
test-llama: prepare-test
|
||||||
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||||
|
|
||||||
test-llama-gguf: prepare-test
|
test-llama-gguf: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||||
@@ -735,6 +760,13 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
|||||||
mkdir -p backend-assets/util/
|
mkdir -p backend-assets/util/
|
||||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/llama-ggml
|
||||||
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
||||||
@@ -829,7 +861,7 @@ swagger:
|
|||||||
|
|
||||||
.PHONY: gen-assets
|
.PHONY: gen-assets
|
||||||
gen-assets:
|
gen-assets:
|
||||||
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
|
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
docs/layouts/_default:
|
docs/layouts/_default:
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
backend: silero-vad
|
|
||||||
name: silero-vad
|
|
||||||
parameters:
|
|
||||||
model: silero-vad.onnx
|
|
||||||
download_files:
|
|
||||||
- filename: silero-vad.onnx
|
|
||||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
|
||||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
|
||||||
@@ -129,7 +129,7 @@ detect_gpu
|
|||||||
detect_gpu_size
|
detect_gpu_size
|
||||||
|
|
||||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
||||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
|
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||||
|
|
||||||
check_vars
|
check_vars
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
backend: silero-vad
|
|
||||||
name: silero-vad
|
|
||||||
parameters:
|
|
||||||
model: silero-vad.onnx
|
|
||||||
download_files:
|
|
||||||
- filename: silero-vad.onnx
|
|
||||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
|
||||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
backend: silero-vad
|
|
||||||
name: silero-vad
|
|
||||||
parameters:
|
|
||||||
model: silero-vad.onnx
|
|
||||||
download_files:
|
|
||||||
- filename: silero-vad.onnx
|
|
||||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
|
||||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
|
||||||
@@ -163,11 +163,6 @@ message Reply {
|
|||||||
double timing_token_generation = 5;
|
double timing_token_generation = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message GrammarTrigger {
|
|
||||||
string word = 1;
|
|
||||||
bool at_start = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message ModelOptions {
|
message ModelOptions {
|
||||||
string Model = 1;
|
string Model = 1;
|
||||||
int32 ContextSize = 2;
|
int32 ContextSize = 2;
|
||||||
@@ -252,8 +247,6 @@ message ModelOptions {
|
|||||||
|
|
||||||
string CacheTypeKey = 63;
|
string CacheTypeKey = 63;
|
||||||
string CacheTypeValue = 64;
|
string CacheTypeValue = 64;
|
||||||
|
|
||||||
repeated GrammarTrigger GrammarTriggers = 65;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
#include "backend.grpc.pb.h"
|
#include "backend.grpc.pb.h"
|
||||||
#include "utils.hpp"
|
#include "utils.hpp"
|
||||||
#include "sampling.h"
|
#include "sampling.h"
|
||||||
|
#include "speculative.h"
|
||||||
// include std::regex
|
// include std::regex
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
@@ -185,12 +186,45 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct llama_slot_params {
|
||||||
|
uint32_t seed = -1; // RNG seed
|
||||||
|
bool stream = true;
|
||||||
|
bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt
|
||||||
|
bool return_tokens = false;
|
||||||
|
|
||||||
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||||
|
int32_t n_discard = 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
|
||||||
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
|
int32_t n_indent = 0; // mininum line indentation for the generated text in number of whitespace characters
|
||||||
|
|
||||||
|
int64_t t_max_prompt_ms = -1; // TODO: implement
|
||||||
|
int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
|
||||||
|
|
||||||
|
std::vector<common_adapter_lora_info> lora;
|
||||||
|
|
||||||
|
std::vector<std::string> antiprompt;
|
||||||
|
std::vector<std::string> response_fields;
|
||||||
|
bool timings_per_token = false;
|
||||||
|
bool post_sampling_probs = false;
|
||||||
|
bool ignore_eos = false;
|
||||||
|
|
||||||
|
json input_prefix;
|
||||||
|
json input_suffix;
|
||||||
|
|
||||||
|
struct common_params_sampling sampling;
|
||||||
|
struct common_params_speculative speculative;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct llama_client_slot
|
struct llama_client_slot
|
||||||
{
|
{
|
||||||
int id;
|
int id;
|
||||||
int task_id = -1;
|
int task_id = -1;
|
||||||
|
|
||||||
struct slot_params params;
|
struct llama_slot_params params;
|
||||||
|
common_speculative * spec = nullptr;
|
||||||
|
llama_batch batch_spec = {};
|
||||||
|
|
||||||
|
|
||||||
slot_state state = IDLE;
|
slot_state state = IDLE;
|
||||||
slot_command command = NONE;
|
slot_command command = NONE;
|
||||||
@@ -283,6 +317,7 @@ struct llama_client_slot
|
|||||||
images.clear();
|
images.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool has_budget(common_params &global_params) {
|
bool has_budget(common_params &global_params) {
|
||||||
if (params.n_predict == -1 && global_params.n_predict == -1)
|
if (params.n_predict == -1 && global_params.n_predict == -1)
|
||||||
{
|
{
|
||||||
@@ -454,6 +489,10 @@ struct llama_server_context
|
|||||||
{
|
{
|
||||||
llama_model *model = nullptr;
|
llama_model *model = nullptr;
|
||||||
llama_context *ctx = nullptr;
|
llama_context *ctx = nullptr;
|
||||||
|
common_init_result llama_init_dft;
|
||||||
|
llama_context * ctx_dft = nullptr;
|
||||||
|
llama_model * model_dft = nullptr;
|
||||||
|
llama_context_params cparams_dft;
|
||||||
const llama_vocab * vocab = nullptr;
|
const llama_vocab * vocab = nullptr;
|
||||||
|
|
||||||
clip_ctx *clp_ctx = nullptr;
|
clip_ctx *clp_ctx = nullptr;
|
||||||
@@ -468,9 +507,6 @@ struct llama_server_context
|
|||||||
bool add_bos_token = true;
|
bool add_bos_token = true;
|
||||||
bool has_eos_token = true;
|
bool has_eos_token = true;
|
||||||
|
|
||||||
bool grammar_lazy = false;
|
|
||||||
std::vector<common_grammar_trigger> grammar_trigger_words;
|
|
||||||
|
|
||||||
int32_t n_ctx; // total context for all clients / slots
|
int32_t n_ctx; // total context for all clients / slots
|
||||||
|
|
||||||
// system prompt
|
// system prompt
|
||||||
@@ -505,6 +541,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool load_model(const common_params ¶ms_)
|
bool load_model(const common_params ¶ms_)
|
||||||
{
|
{
|
||||||
params = params_;
|
params = params_;
|
||||||
@@ -548,6 +585,45 @@ struct llama_server_context
|
|||||||
add_bos_token = llama_vocab_get_add_bos(vocab);
|
add_bos_token = llama_vocab_get_add_bos(vocab);
|
||||||
has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
|
has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
|
||||||
|
|
||||||
|
if (!params.speculative.model.empty()) {
|
||||||
|
LOG("loading draft model '%s'\n", params.speculative.model.c_str());
|
||||||
|
|
||||||
|
auto params_dft = params;
|
||||||
|
|
||||||
|
params_dft.devices = params.speculative.devices;
|
||||||
|
params_dft.model = params.speculative.model;
|
||||||
|
params_dft.n_ctx = params.speculative.n_ctx == 0 ? params.n_ctx / params.n_parallel : params.speculative.n_ctx;
|
||||||
|
params_dft.n_gpu_layers = params.speculative.n_gpu_layers;
|
||||||
|
params_dft.n_parallel = 1;
|
||||||
|
|
||||||
|
llama_init_dft = common_init_from_params(params_dft);
|
||||||
|
|
||||||
|
model_dft = llama_init_dft.model.get();
|
||||||
|
|
||||||
|
if (model_dft == nullptr) {
|
||||||
|
LOG("failed to load draft model, '%s'\n", params.speculative.model.c_str());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!common_speculative_are_compatible(ctx, llama_init_dft.context.get())) {
|
||||||
|
LOG("the draft model '%s' is not compatible with the target model '%s'\n", params.speculative.model.c_str(), params.model.c_str());
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int n_ctx_dft = llama_n_ctx(llama_init_dft.context.get());
|
||||||
|
|
||||||
|
cparams_dft = common_context_params_to_llama(params_dft);
|
||||||
|
cparams_dft.n_batch = n_ctx_dft;
|
||||||
|
|
||||||
|
// force F16 KV cache for the draft model for extra performance
|
||||||
|
cparams_dft.type_k = GGML_TYPE_F16;
|
||||||
|
cparams_dft.type_v = GGML_TYPE_F16;
|
||||||
|
|
||||||
|
// the context is not needed - we will create one for each slot
|
||||||
|
llama_init_dft.context.reset();
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -576,6 +652,22 @@ struct llama_server_context
|
|||||||
slot.n_ctx = n_ctx_slot;
|
slot.n_ctx = n_ctx_slot;
|
||||||
slot.n_predict = params.n_predict;
|
slot.n_predict = params.n_predict;
|
||||||
|
|
||||||
|
if (model_dft) {
|
||||||
|
slot.batch_spec = llama_batch_init(params.speculative.n_max + 1, 0, 1);
|
||||||
|
|
||||||
|
ctx_dft = llama_init_from_model(model_dft, cparams_dft);
|
||||||
|
if (ctx_dft == nullptr) {
|
||||||
|
LOG("%s", "failed to create draft context\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
slot.spec = common_speculative_init(ctx_dft);
|
||||||
|
if (slot.spec == nullptr) {
|
||||||
|
LOG("%s", "failed to create speculator\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
LOG_INFO("new slot", {
|
LOG_INFO("new slot", {
|
||||||
{"slot_id", slot.id},
|
{"slot_id", slot.id},
|
||||||
{"n_ctx_slot", slot.n_ctx}
|
{"n_ctx_slot", slot.n_ctx}
|
||||||
@@ -684,9 +776,11 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
||||||
slot_params default_params;
|
llama_slot_params default_params;
|
||||||
common_params_sampling default_sparams;
|
common_params_sampling default_sparams;
|
||||||
|
|
||||||
|
default_sparams.speculative = params_base.speculative;
|
||||||
|
|
||||||
slot->params.stream = json_value(data, "stream", false);
|
slot->params.stream = json_value(data, "stream", false);
|
||||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||||
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
||||||
@@ -709,8 +803,15 @@ struct llama_server_context
|
|||||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
slot->sparams.grammar_trigger_words = grammar_trigger_words;
|
|
||||||
slot->sparams.grammar_lazy = grammar_lazy;
|
|
||||||
|
slot->sparams.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
|
||||||
|
slot->sparams.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);
|
||||||
|
slot->sparams.speculative.p_min = json_value(data, "speculative.p_min", defaults.speculative.p_min);
|
||||||
|
|
||||||
|
slot->sparams.speculative.n_min = std::min(params.speculative.n_max, params.speculative.n_min);
|
||||||
|
slot->sparams.speculative.n_min = std::max(params.speculative.n_min, 2);
|
||||||
|
slot->sparams.speculative.n_max = std::max(params.speculative.n_max, 0);
|
||||||
|
|
||||||
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
||||||
// Might be better to reject the request with a 400 ?
|
// Might be better to reject the request with a 400 ?
|
||||||
@@ -1155,14 +1256,6 @@ struct llama_server_context
|
|||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (slot.n_past >= slot.n_ctx) {
|
|
||||||
slot.truncated = true;
|
|
||||||
slot.stopped_limit = true;
|
|
||||||
slot.has_next_token = false;
|
|
||||||
|
|
||||||
LOG_VERBOSE("stopped due to running out of context capacity", {});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
|
if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
|
||||||
{
|
{
|
||||||
slot.stopped_eos = true;
|
slot.stopped_eos = true;
|
||||||
@@ -1635,17 +1728,17 @@ struct llama_server_context
|
|||||||
{
|
{
|
||||||
if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx)
|
if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx)
|
||||||
{
|
{
|
||||||
// this check is redundant (for good)
|
|
||||||
// we should never get here, because generation should already stopped in process_token()
|
|
||||||
|
|
||||||
// START LOCALAI changes
|
// START LOCALAI changes
|
||||||
// Temporary disable context-shifting as it can lead to infinite loops (issue: https://github.com/ggerganov/llama.cpp/issues/3969)
|
// Temporary disable context-shifting as it can lead to infinite loops (issue: https://github.com/ggerganov/llama.cpp/issues/3969)
|
||||||
// See: https://github.com/mudler/LocalAI/issues/1333
|
// See: https://github.com/mudler/LocalAI/issues/1333
|
||||||
// Context is exhausted, release the slot
|
// Context is exhausted, release the slot
|
||||||
slot.release();
|
slot.release();
|
||||||
send_final_response(slot);
|
send_final_response(slot);
|
||||||
slot.has_next_token = false;
|
slot.cache_tokens.clear();
|
||||||
LOG_ERROR("context is exhausted, release the slot", {});
|
slot.n_past = 0;
|
||||||
|
slot.truncated = false;
|
||||||
|
slot.has_next_token = true;
|
||||||
|
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
// END LOCALAI changes
|
// END LOCALAI changes
|
||||||
@@ -2037,6 +2130,97 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// do speculative decoding
|
||||||
|
for (auto & slot : slots) {
|
||||||
|
if (!slot.is_processing() || !(ctx_dft && params.speculative.n_max > 0)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (slot.state != PROCESSING) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// determine the max draft that fits the current slot state
|
||||||
|
int n_draft_max = slot.params.speculative.n_max;
|
||||||
|
|
||||||
|
// note: n_past is not yet increased for the `id` token sampled above
|
||||||
|
// also, need to leave space for 1 extra token to allow context shifts
|
||||||
|
n_draft_max = std::min(n_draft_max, slot.n_ctx - slot.n_past - 2);
|
||||||
|
|
||||||
|
if (slot.n_remaining > 0) {
|
||||||
|
n_draft_max = std::min(n_draft_max, slot.n_remaining - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG("max possible draft: %d\n", n_draft_max);
|
||||||
|
|
||||||
|
if (n_draft_max < slot.params.speculative.n_min) {
|
||||||
|
LOG("the max possible draft is too small: %d < %d - skipping speculative decoding\n", n_draft_max, slot.params.speculative.n_min);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
llama_token id = slot.sampled;
|
||||||
|
|
||||||
|
struct common_speculative_params params_spec;
|
||||||
|
params_spec.n_draft = n_draft_max;
|
||||||
|
params_spec.n_reuse = llama_n_ctx(ctx_dft) - slot.params.speculative.n_max;
|
||||||
|
params_spec.p_min = slot.params.speculative.p_min;
|
||||||
|
|
||||||
|
llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, slot.cache_tokens, id);
|
||||||
|
|
||||||
|
// ignore small drafts
|
||||||
|
if (slot.params.speculative.n_min > (int) draft.size()) {
|
||||||
|
LOG("ignoring small draft: %d < %d\n", (int) draft.size(), slot.params.speculative.n_min);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// construct the speculation batch
|
||||||
|
common_batch_clear(slot.batch_spec);
|
||||||
|
common_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < draft.size(); ++i) {
|
||||||
|
common_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id }, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG("decoding speculative batch, size = %d\n", slot.batch_spec.n_tokens);
|
||||||
|
|
||||||
|
llama_decode(ctx, slot.batch_spec);
|
||||||
|
|
||||||
|
// the accepted tokens from the speculation
|
||||||
|
const auto ids = common_sampler_sample_and_accept_n(slot.ctx_sampling, ctx, draft);
|
||||||
|
|
||||||
|
slot.n_past += ids.size();
|
||||||
|
slot.n_decoded += ids.size();
|
||||||
|
|
||||||
|
slot.cache_tokens.push_back(id);
|
||||||
|
slot.cache_tokens.insert(slot.cache_tokens.end(), ids.begin(), ids.end() - 1);
|
||||||
|
|
||||||
|
llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < ids.size(); ++i) {
|
||||||
|
completion_token_output result;
|
||||||
|
|
||||||
|
result.tok = ids[i];
|
||||||
|
result.text_to_send = common_token_to_piece(ctx, result.tok, params.special);
|
||||||
|
//result.prob = 1.0f; // set later
|
||||||
|
|
||||||
|
// TODO: set result.probs
|
||||||
|
|
||||||
|
if (!process_token(result, slot)) {
|
||||||
|
// release slot because of stop condition
|
||||||
|
slot.release();
|
||||||
|
slot.print_timings();
|
||||||
|
send_final_response(slot);
|
||||||
|
metrics.on_prediction(slot);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG("accepted %d/%d draft tokens, new n_past = %d\n", (int) ids.size() - 1, (int) draft.size(), slot.n_past);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
LOG_VERBOSE("slots updated", {});
|
LOG_VERBOSE("slots updated", {});
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -2309,6 +2493,30 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
params.cpuparams.n_threads = request->threads();
|
params.cpuparams.n_threads = request->threads();
|
||||||
params.n_gpu_layers = request->ngpulayers();
|
params.n_gpu_layers = request->ngpulayers();
|
||||||
params.n_batch = request->nbatch();
|
params.n_batch = request->nbatch();
|
||||||
|
params.speculative.model = request->draftmodel();
|
||||||
|
|
||||||
|
// If options is not NULL, parse options
|
||||||
|
for (int i = 0; request->options()[i] != NULL; i++) {
|
||||||
|
char *optname = strtok(request->options()[i], ":");
|
||||||
|
char *optval = strtok(NULL, ":");
|
||||||
|
if (optval == NULL) {
|
||||||
|
optval = "true";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strcmp(optname, "speculative.n_gpu_layers")) {
|
||||||
|
params.speculative.n_gpu_layers = std::stoi(optval);
|
||||||
|
}
|
||||||
|
if (!strcmp(optname, "speculative.n_ctx")) {
|
||||||
|
params.speculative.n_ctx = std::stoi(optval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if params.speculative.n_gpu_layers == 0 {
|
||||||
|
params.speculative.n_gpu_layers = params.n_gpu_layers;
|
||||||
|
}
|
||||||
|
if params.speculative.n_ctx == 0 {
|
||||||
|
params.speculative.n_ctx = params.n_ctx;
|
||||||
|
}
|
||||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||||
//params.n_parallel = 1;
|
//params.n_parallel = 1;
|
||||||
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
||||||
@@ -2387,21 +2595,6 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
if ( request->ropefreqscale() != 0.0f ) {
|
if ( request->ropefreqscale() != 0.0f ) {
|
||||||
params.rope_freq_scale = request->ropefreqscale();
|
params.rope_freq_scale = request->ropefreqscale();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (request->grammartriggers_size() > 0) {
|
|
||||||
LOG_INFO("configuring grammar triggers", {});
|
|
||||||
llama.grammar_lazy = true;
|
|
||||||
for (int i = 0; i < request->grammartriggers_size(); i++) {
|
|
||||||
common_grammar_trigger trigger;
|
|
||||||
trigger.word = request->grammartriggers(i).word();
|
|
||||||
trigger.at_start = request->grammartriggers(i).at_start();
|
|
||||||
llama.grammar_trigger_words.push_back(trigger);
|
|
||||||
LOG_INFO("grammar trigger", {
|
|
||||||
{ "word", trigger.word },
|
|
||||||
{ "at_start", trigger.at_start }
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2550,18 +2743,6 @@ public:
|
|||||||
return grpc::Status::OK;
|
return grpc::Status::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
|
|
||||||
json data = parse_options(false, request, llama);
|
|
||||||
|
|
||||||
std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
|
|
||||||
|
|
||||||
for (int i=0 ; i< tokens.size(); i++){
|
|
||||||
response->add_tokens(tokens[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return grpc::Status::OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||||
llama_client_slot* active_slot = llama.get_active_slot();
|
llama_client_slot* active_slot = llama.get_active_slot();
|
||||||
|
|
||||||
|
|||||||
204
backend/go/llm/llama-ggml/llama.go
Normal file
204
backend/go/llm/llama-ggml/llama.go
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/go-skynet/go-llama.cpp"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LLM struct {
|
||||||
|
base.SingleThread
|
||||||
|
|
||||||
|
llama *llama.LLama
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
|
ropeFreqBase := float32(10000)
|
||||||
|
ropeFreqScale := float32(1)
|
||||||
|
|
||||||
|
if opts.RopeFreqBase != 0 {
|
||||||
|
ropeFreqBase = opts.RopeFreqBase
|
||||||
|
}
|
||||||
|
if opts.RopeFreqScale != 0 {
|
||||||
|
ropeFreqScale = opts.RopeFreqScale
|
||||||
|
}
|
||||||
|
|
||||||
|
llamaOpts := []llama.ModelOption{
|
||||||
|
llama.WithRopeFreqBase(ropeFreqBase),
|
||||||
|
llama.WithRopeFreqScale(ropeFreqScale),
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.NGQA != 0 {
|
||||||
|
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.RMSNormEps != 0 {
|
||||||
|
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.ContextSize != 0 {
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
|
||||||
|
}
|
||||||
|
if opts.F16Memory {
|
||||||
|
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||||
|
}
|
||||||
|
if opts.Embeddings {
|
||||||
|
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
||||||
|
}
|
||||||
|
if opts.NGPULayers != 0 {
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
|
||||||
|
}
|
||||||
|
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
|
||||||
|
if opts.NBatch != 0 {
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
|
||||||
|
} else {
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.NUMA {
|
||||||
|
llamaOpts = append(llamaOpts, llama.EnableNUMA)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.LowVRAM {
|
||||||
|
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
|
||||||
|
}
|
||||||
|
|
||||||
|
model, err := llama.New(opts.ModelFile, llamaOpts...)
|
||||||
|
llm.llama = model
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
|
||||||
|
ropeFreqBase := float32(10000)
|
||||||
|
ropeFreqScale := float32(1)
|
||||||
|
|
||||||
|
if opts.RopeFreqBase != 0 {
|
||||||
|
ropeFreqBase = opts.RopeFreqBase
|
||||||
|
}
|
||||||
|
if opts.RopeFreqScale != 0 {
|
||||||
|
ropeFreqScale = opts.RopeFreqScale
|
||||||
|
}
|
||||||
|
predictOptions := []llama.PredictOption{
|
||||||
|
llama.SetTemperature(opts.Temperature),
|
||||||
|
llama.SetTopP(opts.TopP),
|
||||||
|
llama.SetTopK(int(opts.TopK)),
|
||||||
|
llama.SetTokens(int(opts.Tokens)),
|
||||||
|
llama.SetThreads(int(opts.Threads)),
|
||||||
|
llama.WithGrammar(opts.Grammar),
|
||||||
|
llama.SetRopeFreqBase(ropeFreqBase),
|
||||||
|
llama.SetRopeFreqScale(ropeFreqScale),
|
||||||
|
llama.SetNegativePromptScale(opts.NegativePromptScale),
|
||||||
|
llama.SetNegativePrompt(opts.NegativePrompt),
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.PromptCacheAll {
|
||||||
|
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.PromptCacheRO {
|
||||||
|
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expected absolute path
|
||||||
|
if opts.PromptCachePath != "" {
|
||||||
|
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Mirostat != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.MirostatETA != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.MirostatTAU != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Debug {
|
||||||
|
predictOptions = append(predictOptions, llama.Debug)
|
||||||
|
}
|
||||||
|
|
||||||
|
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
|
||||||
|
|
||||||
|
if opts.PresencePenalty != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.NKeep != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Batch != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.F16KV {
|
||||||
|
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.IgnoreEOS {
|
||||||
|
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Seed != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
|
||||||
|
}
|
||||||
|
|
||||||
|
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
|
||||||
|
|
||||||
|
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
|
||||||
|
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
|
||||||
|
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
|
||||||
|
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
|
||||||
|
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
|
||||||
|
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
|
||||||
|
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
|
||||||
|
return predictOptions
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
|
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
|
predictOptions := buildPredictOptions(opts)
|
||||||
|
|
||||||
|
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
|
||||||
|
results <- token
|
||||||
|
return true
|
||||||
|
}))
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("err: ", err)
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||||
|
predictOptions := buildPredictOptions(opts)
|
||||||
|
|
||||||
|
if len(opts.EmbeddingTokens) > 0 {
|
||||||
|
tokens := []int{}
|
||||||
|
for _, t := range opts.EmbeddingTokens {
|
||||||
|
tokens = append(tokens, int(t))
|
||||||
|
}
|
||||||
|
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
|
||||||
|
}
|
||||||
19
backend/go/llm/llama-ggml/main.go
Normal file
19
backend/go/llm/llama-ggml/main.go
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
protobuf
|
protobuf
|
||||||
grpcio-tools
|
grpcio-tools
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
packaging==24.1
|
packaging==24.1
|
||||||
@@ -159,18 +159,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
torchType = torch.float16
|
torchType = torch.float16
|
||||||
variant = "fp16"
|
variant = "fp16"
|
||||||
|
|
||||||
options = request.Options
|
|
||||||
|
|
||||||
# empty dict
|
|
||||||
self.options = {}
|
|
||||||
|
|
||||||
# The options are a list of strings in this form optname:optvalue
|
|
||||||
# We are storing all the options in a dict so we can use it later when
|
|
||||||
# generating the images
|
|
||||||
for opt in options:
|
|
||||||
key, value = opt.split(":")
|
|
||||||
self.options[key] = value
|
|
||||||
|
|
||||||
local = False
|
local = False
|
||||||
modelFile = request.Model
|
modelFile = request.Model
|
||||||
|
|
||||||
@@ -453,9 +441,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
||||||
kwargs = {key: options.get(key) for key in keys if key in options}
|
kwargs = {key: options.get(key) for key in keys if key in options}
|
||||||
|
|
||||||
# populate kwargs from self.options.
|
|
||||||
kwargs.update(self.options)
|
|
||||||
|
|
||||||
# Set seed
|
# Set seed
|
||||||
if request.seed > 0:
|
if request.seed > 0:
|
||||||
kwargs["generator"] = torch.Generator(device=self.device).manual_seed(
|
kwargs["generator"] = torch.Generator(device=self.device).manual_seed(
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
protobuf
|
protobuf
|
||||||
grpcio-tools
|
grpcio-tools
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
protobuf
|
protobuf
|
||||||
phonemizer
|
phonemizer
|
||||||
scipy
|
scipy
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -5,4 +5,4 @@ accelerate
|
|||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
sentence-transformers==3.4.1
|
sentence-transformers==3.3.1
|
||||||
@@ -6,4 +6,4 @@ accelerate
|
|||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
sentence-transformers==3.4.1
|
sentence-transformers==3.3.1
|
||||||
|
|||||||
@@ -5,4 +5,4 @@ numba==0.60.0
|
|||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
sentence-transformers==3.4.1
|
sentence-transformers==3.3.1
|
||||||
|
|||||||
@@ -7,4 +7,4 @@ numba==0.60.0
|
|||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
sentence-transformers==3.4.1
|
sentence-transformers==3.3.1
|
||||||
|
|||||||
@@ -8,4 +8,4 @@ numba==0.60.0
|
|||||||
intel-extension-for-transformers
|
intel-extension-for-transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
sentence-transformers==3.4.1
|
sentence-transformers==3.3.1
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.70.0
|
grpcio==1.69.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
||||||
@@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||||
log.Error().Err(err).Msg("error installing models")
|
log.Error().Err(err).Msg("error installing models")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -145,7 +145,13 @@ func New(opts ...config.AppOption) (*Application, error) {
|
|||||||
|
|
||||||
if options.LoadToMemory != nil {
|
if options.LoadToMemory != nil {
|
||||||
for _, m := range options.LoadToMemory {
|
for _, m := range options.LoadToMemory {
|
||||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
|
cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
|
||||||
|
config.LoadOptionDebug(options.Debug),
|
||||||
|
config.LoadOptionThreads(options.Threads),
|
||||||
|
config.LoadOptionContextSize(options.ContextSize),
|
||||||
|
config.LoadOptionF16(options.F16),
|
||||||
|
config.ModelPath(options.ModelPath),
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ type TokenUsage struct {
|
|||||||
TimingTokenGeneration float64
|
TimingTokenGeneration float64
|
||||||
}
|
}
|
||||||
|
|
||||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||||
modelFile := c.Model
|
modelFile := c.Model
|
||||||
|
|
||||||
// Check if the modelFile exists, if it doesn't try to load it from the gallery
|
// Check if the modelFile exists, if it doesn't try to load it from the gallery
|
||||||
@@ -48,7 +48,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
opts := ModelOptions(*c, o)
|
opts := ModelOptions(c, o)
|
||||||
inferenceModel, err := loader.Load(opts...)
|
inferenceModel, err := loader.Load(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -84,7 +84,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
|||||||
|
|
||||||
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
||||||
fn := func() (LLMResponse, error) {
|
fn := func() (LLMResponse, error) {
|
||||||
opts := gRPCPredictOpts(*c, loader.ModelPath)
|
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||||
opts.Prompt = s
|
opts.Prompt = s
|
||||||
opts.Messages = protoMessages
|
opts.Messages = protoMessages
|
||||||
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
|
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
|
||||||
|
|||||||
@@ -118,19 +118,9 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
nGPULayers = *c.NGPULayers
|
nGPULayers = *c.NGPULayers
|
||||||
}
|
}
|
||||||
|
|
||||||
triggers := make([]*pb.GrammarTrigger, 0)
|
|
||||||
for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
|
|
||||||
triggers = append(triggers, &pb.GrammarTrigger{
|
|
||||||
Word: t.Word,
|
|
||||||
AtStart: t.AtStart,
|
|
||||||
})
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return &pb.ModelOptions{
|
return &pb.ModelOptions{
|
||||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||||
SchedulerType: c.Diffusers.SchedulerType,
|
SchedulerType: c.Diffusers.SchedulerType,
|
||||||
GrammarTriggers: triggers,
|
|
||||||
PipelineType: c.Diffusers.PipelineType,
|
PipelineType: c.Diffusers.PipelineType,
|
||||||
CFGScale: c.CFGScale,
|
CFGScale: c.CFGScale,
|
||||||
LoraAdapter: c.LoraAdapter,
|
LoraAdapter: c.LoraAdapter,
|
||||||
|
|||||||
@@ -9,10 +9,10 @@ import (
|
|||||||
model "github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||||
opts := ModelOptions(backendConfig, appConfig)
|
|
||||||
rerankModel, err := loader.Load(opts...)
|
|
||||||
|
|
||||||
|
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||||
|
rerankModel, err := loader.Load(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func SoundGeneration(
|
func SoundGeneration(
|
||||||
|
modelFile string,
|
||||||
text string,
|
text string,
|
||||||
duration *float32,
|
duration *float32,
|
||||||
temperature *float32,
|
temperature *float32,
|
||||||
@@ -24,9 +25,8 @@ func SoundGeneration(
|
|||||||
backendConfig config.BackendConfig,
|
backendConfig config.BackendConfig,
|
||||||
) (string, *proto.Result, error) {
|
) (string, *proto.Result, error) {
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig)
|
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||||
soundGenModel, err := loader.Load(opts...)
|
soundGenModel, err := loader.Load(opts...)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", nil, err
|
return "", nil, err
|
||||||
}
|
}
|
||||||
@@ -44,7 +44,7 @@ func SoundGeneration(
|
|||||||
|
|
||||||
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
|
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
|
||||||
Text: text,
|
Text: text,
|
||||||
Model: backendConfig.Model,
|
Model: modelFile,
|
||||||
Dst: filePath,
|
Dst: filePath,
|
||||||
Sample: doSample,
|
Sample: doSample,
|
||||||
Duration: duration,
|
Duration: duration,
|
||||||
|
|||||||
@@ -4,17 +4,24 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/grpc"
|
"github.com/mudler/LocalAI/pkg/grpc"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
|
func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
|
||||||
|
|
||||||
|
modelFile := backendConfig.Model
|
||||||
|
|
||||||
var inferenceModel grpc.Backend
|
var inferenceModel grpc.Backend
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig)
|
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||||
inferenceModel, err = loader.Load(opts...)
|
|
||||||
|
|
||||||
|
if backendConfig.Backend == "" {
|
||||||
|
inferenceModel, err = loader.Load(opts...)
|
||||||
|
} else {
|
||||||
|
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
||||||
|
inferenceModel, err = loader.Load(opts...)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return schema.TokenizeResponse{}, err
|
return schema.TokenizeResponse{}, err
|
||||||
}
|
}
|
||||||
@@ -28,10 +35,6 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
|||||||
return schema.TokenizeResponse{}, err
|
return schema.TokenizeResponse{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.Tokens == nil {
|
|
||||||
resp.Tokens = make([]int32, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
return schema.TokenizeResponse{
|
return schema.TokenizeResponse{
|
||||||
Tokens: resp.Tokens,
|
Tokens: resp.Tokens,
|
||||||
}, nil
|
}, nil
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
|||||||
tks = append(tks, int(t))
|
tks = append(tks, int(t))
|
||||||
}
|
}
|
||||||
tr.Segments = append(tr.Segments,
|
tr.Segments = append(tr.Segments,
|
||||||
schema.TranscriptionSegment{
|
schema.Segment{
|
||||||
Text: s.Text,
|
Text: s.Text,
|
||||||
Id: int(s.Id),
|
Id: int(s.Id),
|
||||||
Start: time.Duration(s.Start),
|
Start: time.Duration(s.Start),
|
||||||
|
|||||||
@@ -14,22 +14,28 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func ModelTTS(
|
func ModelTTS(
|
||||||
|
backend,
|
||||||
text,
|
text,
|
||||||
|
modelFile,
|
||||||
voice,
|
voice,
|
||||||
language string,
|
language string,
|
||||||
loader *model.ModelLoader,
|
loader *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
backendConfig config.BackendConfig,
|
backendConfig config.BackendConfig,
|
||||||
) (string, *proto.Result, error) {
|
) (string, *proto.Result, error) {
|
||||||
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
|
bb := backend
|
||||||
ttsModel, err := loader.Load(opts...)
|
if bb == "" {
|
||||||
|
bb = model.PiperBackend
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
|
||||||
|
ttsModel, err := loader.Load(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", nil, err
|
return "", nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if ttsModel == nil {
|
if ttsModel == nil {
|
||||||
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
|
return "", nil, fmt.Errorf("could not load piper model")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
||||||
@@ -39,21 +45,22 @@ func ModelTTS(
|
|||||||
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
|
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
|
||||||
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
||||||
|
|
||||||
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
|
// If the model file is not empty, we pass it joined with the model path
|
||||||
// This should be addressed in a follow up PR soon.
|
|
||||||
// Copying it over nearly verbatim, as TTS backends are not functional without this.
|
|
||||||
modelPath := ""
|
modelPath := ""
|
||||||
// Checking first that it exists and is not outside ModelPath
|
if modelFile != "" {
|
||||||
// TODO: we should actually first check if the modelFile is looking like
|
// If the model file is not empty, we pass it joined with the model path
|
||||||
// a FS path
|
// Checking first that it exists and is not outside ModelPath
|
||||||
mp := filepath.Join(loader.ModelPath, backendConfig.Model)
|
// TODO: we should actually first check if the modelFile is looking like
|
||||||
if _, err := os.Stat(mp); err == nil {
|
// a FS path
|
||||||
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
|
mp := filepath.Join(loader.ModelPath, modelFile)
|
||||||
return "", nil, err
|
if _, err := os.Stat(mp); err == nil {
|
||||||
|
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
modelPath = mp
|
||||||
|
} else {
|
||||||
|
modelPath = modelFile
|
||||||
}
|
}
|
||||||
modelPath = mp
|
|
||||||
} else {
|
|
||||||
modelPath = backendConfig.Model // skip this step if it fails?????
|
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
|
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
|
||||||
|
|||||||
@@ -1,38 +0,0 @@
|
|||||||
package backend
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
|
||||||
)
|
|
||||||
|
|
||||||
func VAD(request *schema.VADRequest,
|
|
||||||
ctx context.Context,
|
|
||||||
ml *model.ModelLoader,
|
|
||||||
appConfig *config.ApplicationConfig,
|
|
||||||
backendConfig config.BackendConfig) (*schema.VADResponse, error) {
|
|
||||||
opts := ModelOptions(backendConfig, appConfig)
|
|
||||||
vadModel, err := ml.Load(opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
req := proto.VADRequest{
|
|
||||||
Audio: request.Audio,
|
|
||||||
}
|
|
||||||
resp, err := vadModel.VAD(ctx, &req)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
segments := []schema.VADSegment{}
|
|
||||||
for _, s := range resp.Segments {
|
|
||||||
segments = append(segments, schema.VADSegment{Start: s.Start, End: s.End})
|
|
||||||
}
|
|
||||||
|
|
||||||
return &schema.VADResponse{
|
|
||||||
Segments: segments,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
@@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
|||||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||||
}
|
}
|
||||||
|
|
||||||
err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ type RunCMD struct {
|
|||||||
|
|
||||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
||||||
|
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
|
||||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||||
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
||||||
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
||||||
@@ -89,6 +90,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
||||||
config.WithF16(r.F16),
|
config.WithF16(r.F16),
|
||||||
config.WithStringGalleries(r.Galleries),
|
config.WithStringGalleries(r.Galleries),
|
||||||
|
config.WithModelLibraryURL(r.RemoteLibrary),
|
||||||
config.WithCors(r.CORS),
|
config.WithCors(r.CORS),
|
||||||
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
||||||
config.WithCsrf(r.CSRF),
|
config.WithCsrf(r.CSRF),
|
||||||
|
|||||||
@@ -86,14 +86,13 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
|||||||
options := config.BackendConfig{}
|
options := config.BackendConfig{}
|
||||||
options.SetDefaults()
|
options.SetDefaults()
|
||||||
options.Backend = t.Backend
|
options.Backend = t.Backend
|
||||||
options.Model = t.Model
|
|
||||||
|
|
||||||
var inputFile *string
|
var inputFile *string
|
||||||
if t.InputFile != "" {
|
if t.InputFile != "" {
|
||||||
inputFile = &t.InputFile
|
inputFile = &t.InputFile
|
||||||
}
|
}
|
||||||
|
|
||||||
filePath, _, err := backend.SoundGeneration(text,
|
filePath, _, err := backend.SoundGeneration(t.Model, text,
|
||||||
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
|
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
|
||||||
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
|
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
|
||||||
|
|
||||||
|
|||||||
@@ -52,10 +52,8 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
|
|||||||
|
|
||||||
options := config.BackendConfig{}
|
options := config.BackendConfig{}
|
||||||
options.SetDefaults()
|
options.SetDefaults()
|
||||||
options.Backend = t.Backend
|
|
||||||
options.Model = t.Model
|
|
||||||
|
|
||||||
filePath, _, err := backend.ModelTTS(text, t.Voice, t.Language, ml, opts, options)
|
filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,6 +44,8 @@ type ApplicationConfig struct {
|
|||||||
DisableGalleryEndpoint bool
|
DisableGalleryEndpoint bool
|
||||||
LoadToMemory []string
|
LoadToMemory []string
|
||||||
|
|
||||||
|
ModelLibraryURL string
|
||||||
|
|
||||||
Galleries []Gallery
|
Galleries []Gallery
|
||||||
|
|
||||||
BackendAssets embed.FS
|
BackendAssets embed.FS
|
||||||
@@ -124,6 +126,12 @@ func WithP2PToken(s string) AppOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithModelLibraryURL(url string) AppOption {
|
||||||
|
return func(o *ApplicationConfig) {
|
||||||
|
o.ModelLibraryURL = url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func WithLibPath(path string) AppOption {
|
func WithLibPath(path string) AppOption {
|
||||||
return func(o *ApplicationConfig) {
|
return func(o *ApplicationConfig) {
|
||||||
o.LibPath = path
|
o.LibPath = path
|
||||||
|
|||||||
@@ -287,8 +287,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
|||||||
defaultTopP := 0.95
|
defaultTopP := 0.95
|
||||||
defaultTopK := 40
|
defaultTopK := 40
|
||||||
defaultTemp := 0.9
|
defaultTemp := 0.9
|
||||||
// https://github.com/mudler/LocalAI/issues/2780
|
defaultMirostat := 2
|
||||||
defaultMirostat := 0
|
|
||||||
defaultMirostatTAU := 5.0
|
defaultMirostatTAU := 5.0
|
||||||
defaultMirostatETA := 0.1
|
defaultMirostatETA := 0.1
|
||||||
defaultTypicalP := 1.0
|
defaultTypicalP := 1.0
|
||||||
@@ -437,21 +436,19 @@ func (c *BackendConfig) HasTemplate() bool {
|
|||||||
type BackendConfigUsecases int
|
type BackendConfigUsecases int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
FLAG_ANY BackendConfigUsecases = 0b00000000000
|
FLAG_ANY BackendConfigUsecases = 0b000000000
|
||||||
FLAG_CHAT BackendConfigUsecases = 0b00000000001
|
FLAG_CHAT BackendConfigUsecases = 0b000000001
|
||||||
FLAG_COMPLETION BackendConfigUsecases = 0b00000000010
|
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
|
||||||
FLAG_EDIT BackendConfigUsecases = 0b00000000100
|
FLAG_EDIT BackendConfigUsecases = 0b000000100
|
||||||
FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000
|
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
|
||||||
FLAG_RERANK BackendConfigUsecases = 0b00000010000
|
FLAG_RERANK BackendConfigUsecases = 0b000010000
|
||||||
FLAG_IMAGE BackendConfigUsecases = 0b00000100000
|
FLAG_IMAGE BackendConfigUsecases = 0b000100000
|
||||||
FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000
|
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
|
||||||
FLAG_TTS BackendConfigUsecases = 0b00010000000
|
FLAG_TTS BackendConfigUsecases = 0b010000000
|
||||||
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
|
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
|
||||||
FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000
|
|
||||||
FLAG_VAD BackendConfigUsecases = 0b10000000000
|
|
||||||
|
|
||||||
// Common Subsets
|
// Common Subsets
|
||||||
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
|
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||||
@@ -466,8 +463,6 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
|||||||
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
|
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
|
||||||
"FLAG_TTS": FLAG_TTS,
|
"FLAG_TTS": FLAG_TTS,
|
||||||
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
|
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
|
||||||
"FLAG_TOKENIZE": FLAG_TOKENIZE,
|
|
||||||
"FLAG_VAD": FLAG_VAD,
|
|
||||||
"FLAG_LLM": FLAG_LLM,
|
"FLAG_LLM": FLAG_LLM,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -553,18 +548,5 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (u & FLAG_TOKENIZE) == FLAG_TOKENIZE {
|
|
||||||
tokenizeCapableBackends := []string{"llama.cpp", "rwkv"}
|
|
||||||
if !slices.Contains(tokenizeCapableBackends, c.Backend) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (u & FLAG_VAD) == FLAG_VAD {
|
|
||||||
if c.Backend != "silero-vad" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,10 +81,10 @@ func readMultipleBackendConfigsFromFile(file string, opts ...ConfigLoaderOption)
|
|||||||
c := &[]*BackendConfig{}
|
c := &[]*BackendConfig{}
|
||||||
f, err := os.ReadFile(file)
|
f, err := os.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot read config file %q: %w", file, err)
|
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||||
}
|
}
|
||||||
if err := yaml.Unmarshal(f, c); err != nil {
|
if err := yaml.Unmarshal(f, c); err != nil {
|
||||||
return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot unmarshal config file %q: %w", file, err)
|
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, cc := range *c {
|
for _, cc := range *c {
|
||||||
@@ -101,10 +101,10 @@ func readBackendConfigFromFile(file string, opts ...ConfigLoaderOption) (*Backen
|
|||||||
c := &BackendConfig{}
|
c := &BackendConfig{}
|
||||||
f, err := os.ReadFile(file)
|
f, err := os.ReadFile(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("readBackendConfigFromFile cannot read config file %q: %w", file, err)
|
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||||
}
|
}
|
||||||
if err := yaml.Unmarshal(f, c); err != nil {
|
if err := yaml.Unmarshal(f, c); err != nil {
|
||||||
return nil, fmt.Errorf("readBackendConfigFromFile cannot unmarshal config file %q: %w", file, err)
|
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
c.SetDefaults(opts...)
|
c.SetDefaults(opts...)
|
||||||
@@ -117,9 +117,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
|
|||||||
// Load a config file if present after the model name
|
// Load a config file if present after the model name
|
||||||
cfg := &BackendConfig{
|
cfg := &BackendConfig{
|
||||||
PredictionOptions: schema.PredictionOptions{
|
PredictionOptions: schema.PredictionOptions{
|
||||||
BasicModelRequest: schema.BasicModelRequest{
|
Model: modelName,
|
||||||
Model: modelName,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -147,15 +145,6 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
|
|||||||
return cfg, nil
|
return cfg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bcl *BackendConfigLoader) LoadBackendConfigFileByNameDefaultOptions(modelName string, appConfig *ApplicationConfig) (*BackendConfig, error) {
|
|
||||||
return bcl.LoadBackendConfigFileByName(modelName, appConfig.ModelPath,
|
|
||||||
LoadOptionDebug(appConfig.Debug),
|
|
||||||
LoadOptionThreads(appConfig.Threads),
|
|
||||||
LoadOptionContextSize(appConfig.ContextSize),
|
|
||||||
LoadOptionF16(appConfig.F16),
|
|
||||||
ModelPath(appConfig.ModelPath))
|
|
||||||
}
|
|
||||||
|
|
||||||
// This format is currently only used when reading a single file at startup, passed in via ApplicationConfig.ConfigFile
|
// This format is currently only used when reading a single file at startup, passed in via ApplicationConfig.ConfigFile
|
||||||
func (bcl *BackendConfigLoader) LoadMultipleBackendConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
|
func (bcl *BackendConfigLoader) LoadMultipleBackendConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
|
||||||
bcl.Lock()
|
bcl.Lock()
|
||||||
@@ -178,7 +167,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoa
|
|||||||
defer bcl.Unlock()
|
defer bcl.Unlock()
|
||||||
c, err := readBackendConfigFromFile(file, opts...)
|
c, err := readBackendConfigFromFile(file, opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("LoadBackendConfig cannot read config file %q: %w", file, err)
|
return fmt.Errorf("cannot read config file: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.Validate() {
|
if c.Validate() {
|
||||||
@@ -335,10 +324,9 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
|
|||||||
func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
||||||
bcl.Lock()
|
bcl.Lock()
|
||||||
defer bcl.Unlock()
|
defer bcl.Unlock()
|
||||||
|
|
||||||
entries, err := os.ReadDir(path)
|
entries, err := os.ReadDir(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("LoadBackendConfigsFromPath cannot read directory '%s': %w", path, err)
|
return fmt.Errorf("cannot read directory '%s': %w", path, err)
|
||||||
}
|
}
|
||||||
files := make([]fs.FileInfo, 0, len(entries))
|
files := make([]fs.FileInfo, 0, len(entries))
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
@@ -356,13 +344,13 @@ func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...
|
|||||||
}
|
}
|
||||||
c, err := readBackendConfigFromFile(filepath.Join(path, file.Name()), opts...)
|
c, err := readBackendConfigFromFile(filepath.Join(path, file.Name()), opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Str("File Name", file.Name()).Msgf("LoadBackendConfigsFromPath cannot read config file")
|
log.Error().Err(err).Msgf("cannot read config file: %s", file.Name())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if c.Validate() {
|
if c.Validate() {
|
||||||
bcl.configs[c.Name] = *c
|
bcl.configs[c.Name] = *c
|
||||||
} else {
|
} else {
|
||||||
log.Error().Err(err).Str("Name", c.Name).Msgf("config is not valid")
|
log.Error().Err(err).Msgf("config is not valid")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -48,9 +48,9 @@ parameters:
|
|||||||
Expect(config.Name).To(Equal("bar-baz"))
|
Expect(config.Name).To(Equal("bar-baz"))
|
||||||
Expect(config.Validate()).To(BeTrue())
|
Expect(config.Validate()).To(BeTrue())
|
||||||
|
|
||||||
// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
|
// download https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml
|
||||||
httpClient := http.Client{}
|
httpClient := http.Client{}
|
||||||
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml")
|
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml")
|
||||||
Expect(err).To(BeNil())
|
Expect(err).To(BeNil())
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
tmp, err = os.CreateTemp("", "config.yaml")
|
tmp, err = os.CreateTemp("", "config.yaml")
|
||||||
|
|||||||
@@ -161,11 +161,10 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We try to guess only if we don't have a template defined already
|
// We try to guess only if we don't have a template defined already
|
||||||
guessPath := filepath.Join(modelPath, cfg.ModelFileName())
|
f, err := gguf.ParseGGUFFile(filepath.Join(modelPath, cfg.ModelFileName()))
|
||||||
f, err := gguf.ParseGGUFFile(guessPath)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Only valid for gguf files
|
// Only valid for gguf files
|
||||||
log.Debug().Str("filePath", guessPath).Msg("guessDefaultsFromFile: not a GGUF file")
|
log.Debug().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -48,10 +48,8 @@ var _ = Describe("Model test", func() {
|
|||||||
defer os.RemoveAll(tempdir)
|
defer os.RemoveAll(tempdir)
|
||||||
|
|
||||||
gallery := []GalleryModel{{
|
gallery := []GalleryModel{{
|
||||||
Metadata: Metadata{
|
Name: "bert",
|
||||||
Name: "bert",
|
URL: bertEmbeddingsURL,
|
||||||
URL: bertEmbeddingsURL,
|
|
||||||
},
|
|
||||||
}}
|
}}
|
||||||
out, err := yaml.Marshal(gallery)
|
out, err := yaml.Marshal(gallery)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|||||||
@@ -11,14 +11,6 @@ import (
|
|||||||
// It is used to install the model by resolving the URL and downloading the files.
|
// It is used to install the model by resolving the URL and downloading the files.
|
||||||
// The other fields are used to override the configuration of the model.
|
// The other fields are used to override the configuration of the model.
|
||||||
type GalleryModel struct {
|
type GalleryModel struct {
|
||||||
Metadata `json:",inline" yaml:",inline"`
|
|
||||||
// config_file is read in the situation where URL is blank - and therefore this is a base config.
|
|
||||||
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
|
|
||||||
// Overrides are used to override the configuration of the model located at URL
|
|
||||||
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Metadata struct {
|
|
||||||
URL string `json:"url,omitempty" yaml:"url,omitempty"`
|
URL string `json:"url,omitempty" yaml:"url,omitempty"`
|
||||||
Name string `json:"name,omitempty" yaml:"name,omitempty"`
|
Name string `json:"name,omitempty" yaml:"name,omitempty"`
|
||||||
Description string `json:"description,omitempty" yaml:"description,omitempty"`
|
Description string `json:"description,omitempty" yaml:"description,omitempty"`
|
||||||
@@ -26,6 +18,10 @@ type Metadata struct {
|
|||||||
URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"`
|
URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"`
|
||||||
Icon string `json:"icon,omitempty" yaml:"icon,omitempty"`
|
Icon string `json:"icon,omitempty" yaml:"icon,omitempty"`
|
||||||
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
|
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
|
||||||
|
// config_file is read in the situation where URL is blank - and therefore this is a base config.
|
||||||
|
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
|
||||||
|
// Overrides are used to override the configuration of the model located at URL
|
||||||
|
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
|
||||||
// AdditionalFiles are used to add additional files to the model
|
// AdditionalFiles are used to add additional files to the model
|
||||||
AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
|
AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
|
||||||
// Gallery is a reference to the gallery which contains the model
|
// Gallery is a reference to the gallery which contains the model
|
||||||
|
|||||||
@@ -9,11 +9,7 @@ import (
|
|||||||
var _ = Describe("Gallery API tests", func() {
|
var _ = Describe("Gallery API tests", func() {
|
||||||
Context("requests", func() {
|
Context("requests", func() {
|
||||||
It("parses github with a branch", func() {
|
It("parses github with a branch", func() {
|
||||||
req := GalleryModel{
|
req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
|
||||||
Metadata: Metadata{
|
|
||||||
URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
e, err := GetGalleryConfigFromURL(req.URL, "")
|
e, err := GetGalleryConfigFromURL(req.URL, "")
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(e.Name).To(Equal("gpt4all-j"))
|
Expect(e.Name).To(Equal("gpt4all-j"))
|
||||||
|
|||||||
@@ -130,6 +130,7 @@ func API(application *application.Application) (*fiber.App, error) {
|
|||||||
return metricsService.Shutdown()
|
return metricsService.Shutdown()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
// Health Checks should always be exempt from auth, so register these first
|
// Health Checks should always be exempt from auth, so register these first
|
||||||
routes.HealthRoutes(router)
|
routes.HealthRoutes(router)
|
||||||
@@ -166,15 +167,13 @@ func API(application *application.Application) (*fiber.App, error) {
|
|||||||
galleryService := services.NewGalleryService(application.ApplicationConfig())
|
galleryService := services.NewGalleryService(application.ApplicationConfig())
|
||||||
galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
|
galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
|
||||||
|
|
||||||
requestExtractor := middleware.NewRequestExtractor(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||||
|
routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||||
routes.RegisterElevenLabsRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
routes.RegisterOpenAIRoutes(router, application)
|
||||||
routes.RegisterLocalAIRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
|
||||||
routes.RegisterOpenAIRoutes(router, requestExtractor, application)
|
|
||||||
if !application.ApplicationConfig().DisableWebUI {
|
if !application.ApplicationConfig().DisableWebUI {
|
||||||
routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||||
}
|
}
|
||||||
routes.RegisterJINARoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||||
|
|
||||||
httpFS := http.FS(embedDirStatic)
|
httpFS := http.FS(embedDirStatic)
|
||||||
|
|
||||||
|
|||||||
@@ -299,18 +299,14 @@ var _ = Describe("API test", func() {
|
|||||||
|
|
||||||
g := []gallery.GalleryModel{
|
g := []gallery.GalleryModel{
|
||||||
{
|
{
|
||||||
Metadata: gallery.Metadata{
|
Name: "bert",
|
||||||
Name: "bert",
|
URL: bertEmbeddingsURL,
|
||||||
URL: bertEmbeddingsURL,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Metadata: gallery.Metadata{
|
Name: "bert2",
|
||||||
Name: "bert2",
|
URL: bertEmbeddingsURL,
|
||||||
URL: bertEmbeddingsURL,
|
Overrides: map[string]interface{}{"foo": "bar"},
|
||||||
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
|
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
|
||||||
},
|
|
||||||
Overrides: map[string]interface{}{"foo": "bar"},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
out, err := yaml.Marshal(g)
|
out, err := yaml.Marshal(g)
|
||||||
@@ -480,7 +476,7 @@ var _ = Describe("API test", func() {
|
|||||||
})
|
})
|
||||||
It("apply models from config", func() {
|
It("apply models from config", func() {
|
||||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
|
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
|
||||||
})
|
})
|
||||||
|
|
||||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||||
@@ -526,6 +522,77 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
|
||||||
|
if runtime.GOOS != "linux" {
|
||||||
|
Skip("test supported only on linux")
|
||||||
|
}
|
||||||
|
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||||
|
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
||||||
|
Name: "openllama_3b",
|
||||||
|
Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
|
||||||
|
})
|
||||||
|
|
||||||
|
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||||
|
|
||||||
|
uuid := response["uuid"].(string)
|
||||||
|
|
||||||
|
Eventually(func() bool {
|
||||||
|
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||||
|
return response["processed"].(bool)
|
||||||
|
}, "360s", "10s").Should(Equal(true))
|
||||||
|
|
||||||
|
By("testing completion")
|
||||||
|
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
|
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
|
||||||
|
|
||||||
|
By("testing functions")
|
||||||
|
resp2, err := client.CreateChatCompletion(
|
||||||
|
context.TODO(),
|
||||||
|
openai.ChatCompletionRequest{
|
||||||
|
Model: "openllama_3b",
|
||||||
|
Messages: []openai.ChatCompletionMessage{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "What is the weather like in San Francisco (celsius)?",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Functions: []openai.FunctionDefinition{
|
||||||
|
openai.FunctionDefinition{
|
||||||
|
Name: "get_current_weather",
|
||||||
|
Description: "Get the current weather",
|
||||||
|
Parameters: jsonschema.Definition{
|
||||||
|
Type: jsonschema.Object,
|
||||||
|
Properties: map[string]jsonschema.Definition{
|
||||||
|
"location": {
|
||||||
|
Type: jsonschema.String,
|
||||||
|
Description: "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
Type: jsonschema.String,
|
||||||
|
Enum: []string{"celcius", "fahrenheit"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Required: []string{"location"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(len(resp2.Choices)).To(Equal(1))
|
||||||
|
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
|
||||||
|
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
|
||||||
|
|
||||||
|
var res map[string]string
|
||||||
|
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
|
||||||
|
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||||
|
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
|
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
|
||||||
if runtime.GOOS != "linux" {
|
if runtime.GOOS != "linux" {
|
||||||
Skip("test supported only on linux")
|
Skip("test supported only on linux")
|
||||||
@@ -533,7 +600,7 @@ var _ = Describe("API test", func() {
|
|||||||
|
|
||||||
modelName := "hermes-2-pro-mistral"
|
modelName := "hermes-2-pro-mistral"
|
||||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
|
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
|
||||||
})
|
})
|
||||||
|
|
||||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||||
|
|||||||
47
core/http/ctx/fiber.go
Normal file
47
core/http/ctx/fiber.go
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
package fiberContext
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/services"
|
||||||
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ModelFromContext returns the model from the context
|
||||||
|
// If no model is specified, it will take the first available
|
||||||
|
// Takes a model string as input which should be the one received from the user request.
|
||||||
|
// It returns the model name resolved from the context and an error if any.
|
||||||
|
func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
|
||||||
|
if ctx.Params("model") != "" {
|
||||||
|
modelInput = ctx.Params("model")
|
||||||
|
}
|
||||||
|
if ctx.Query("model") != "" {
|
||||||
|
modelInput = ctx.Query("model")
|
||||||
|
}
|
||||||
|
// Set model from bearer token, if available
|
||||||
|
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
|
||||||
|
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||||
|
|
||||||
|
// If no model was specified, take the first available
|
||||||
|
if modelInput == "" && !bearerExists && firstModel {
|
||||||
|
models, _ := services.ListModels(cl, loader, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
|
if len(models) > 0 {
|
||||||
|
modelInput = models[0]
|
||||||
|
log.Debug().Msgf("No model specified, using: %s", modelInput)
|
||||||
|
} else {
|
||||||
|
log.Debug().Msgf("No model specified, returning error")
|
||||||
|
return "", fmt.Errorf("no model specified")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a model is found in bearer token takes precedence
|
||||||
|
if bearerExists {
|
||||||
|
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||||
|
modelInput = bearer
|
||||||
|
}
|
||||||
|
return modelInput, nil
|
||||||
|
}
|
||||||
@@ -4,7 +4,7 @@ import (
|
|||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
@@ -17,21 +17,45 @@ import (
|
|||||||
// @Router /v1/sound-generation [post]
|
// @Router /v1/sound-generation [post]
|
||||||
func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
input := new(schema.ElevenLabsSoundGenerationRequest)
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.ElevenLabsSoundGenerationRequest)
|
// Get input data from the request body
|
||||||
if !ok || input.ModelID == "" {
|
if err := c.BodyParser(input); err != nil {
|
||||||
return fiber.ErrBadRequest
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
|
||||||
if !ok || cfg == nil {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
modelFile = input.ModelID
|
||||||
|
log.Warn().Str("ModelID", input.ModelID).Msg("Model not found in context")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.ModelID
|
||||||
|
log.Warn().Str("Request ModelID", input.ModelID).Err(err).Msg("error during LoadBackendConfigFileByName, using request ModelID")
|
||||||
|
} else {
|
||||||
|
if input.ModelID != "" {
|
||||||
|
modelFile = input.ModelID
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
}
|
||||||
log.Debug().Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Sound Generation Request about to be sent to backend")
|
log.Debug().Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Sound Generation Request about to be sent to backend")
|
||||||
|
|
||||||
|
if input.Duration != nil {
|
||||||
|
log.Debug().Float32("duration", *input.Duration).Msg("duration set")
|
||||||
|
}
|
||||||
|
if input.Temperature != nil {
|
||||||
|
log.Debug().Float32("temperature", *input.Temperature).Msg("temperature set")
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Support uploading files?
|
// TODO: Support uploading files?
|
||||||
filePath, _, err := backend.SoundGeneration(input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ package elevenlabs
|
|||||||
import (
|
import (
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
@@ -20,21 +20,39 @@ import (
|
|||||||
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
|
input := new(schema.ElevenLabsTTSRequest)
|
||||||
voiceID := c.Params("voice-id")
|
voiceID := c.Params("voice-id")
|
||||||
|
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.ElevenLabsTTSRequest)
|
// Get input data from the request body
|
||||||
if !ok || input.ModelID == "" {
|
if err := c.BodyParser(input); err != nil {
|
||||||
return fiber.ErrBadRequest
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
|
||||||
if !ok || cfg == nil {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
modelFile = input.ModelID
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Str("modelName", input.ModelID).Msg("elevenlabs TTS request recieved")
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.ModelID
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
|
||||||
|
} else {
|
||||||
|
if input.ModelID != "" {
|
||||||
|
modelFile = input.ModelID
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
filePath, _, err := backend.ModelTTS(input.Text, voiceID, input.LanguageCode, ml, appConfig, *cfg)
|
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, "", voiceID, ml, appConfig, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,9 +3,9 @@ package jina
|
|||||||
import (
|
import (
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
@@ -19,32 +19,58 @@ import (
|
|||||||
// @Router /v1/rerank [post]
|
// @Router /v1/rerank [post]
|
||||||
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
req := new(schema.JINARerankRequest)
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.JINARerankRequest)
|
if err := c.BodyParser(req); err != nil {
|
||||||
if !ok || input.Model == "" {
|
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
|
||||||
return fiber.ErrBadRequest
|
"error": "Cannot parse JSON",
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
input := new(schema.TTSRequest)
|
||||||
if !ok || cfg == nil {
|
|
||||||
return fiber.ErrBadRequest
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Str("model", input.Model).Msg("JINA Rerank Request recieved")
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
|
if input.Backend != "" {
|
||||||
|
cfg.Backend = input.Backend
|
||||||
|
}
|
||||||
|
|
||||||
request := &proto.RerankRequest{
|
request := &proto.RerankRequest{
|
||||||
Query: input.Query,
|
Query: req.Query,
|
||||||
TopN: int32(input.TopN),
|
TopN: int32(req.TopN),
|
||||||
Documents: input.Documents,
|
Documents: req.Documents,
|
||||||
}
|
}
|
||||||
|
|
||||||
results, err := backend.Rerank(request, ml, appConfig, *cfg)
|
results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
response := &schema.JINARerankResponse{
|
response := &schema.JINARerankResponse{
|
||||||
Model: input.Model,
|
Model: req.Model,
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, r := range results.Results {
|
for _, r := range results.Results {
|
||||||
|
|||||||
@@ -117,25 +117,19 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
|
|||||||
// @Router /models/available [get]
|
// @Router /models/available [get]
|
||||||
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
|
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
|
||||||
|
|
||||||
models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
|
models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
log.Debug().Msgf("Models found from galleries: %+v", models)
|
||||||
log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries))
|
for _, m := range models {
|
||||||
|
log.Debug().Msgf("Model found from galleries: %+v", m)
|
||||||
m := []gallery.Metadata{}
|
|
||||||
|
|
||||||
for _, mm := range models {
|
|
||||||
m = append(m, mm.Metadata)
|
|
||||||
}
|
}
|
||||||
|
dat, err := json.Marshal(models)
|
||||||
log.Debug().Msgf("Models %#v", m)
|
|
||||||
|
|
||||||
dat, err := json.Marshal(m)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not marshal models: %w", err)
|
return err
|
||||||
}
|
}
|
||||||
return c.Send(dat)
|
return c.Send(dat)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,15 +4,13 @@ import (
|
|||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO: This is not yet in use. Needs middleware rework, since it is not referenced.
|
|
||||||
|
|
||||||
// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
|
// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
|
||||||
//
|
//
|
||||||
// @Summary Get TokenMetrics for Active Slot.
|
// @Summary Get TokenMetrics for Active Slot.
|
||||||
@@ -31,13 +29,18 @@ func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader,
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
modelFile, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||||
if !ok || modelFile != "" {
|
if err != nil {
|
||||||
modelFile = input.Model
|
modelFile = input.Model
|
||||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, err := cl.LoadBackendConfigFileByNameDefaultOptions(modelFile, appConfig)
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Err(err)
|
log.Err(err)
|
||||||
|
|||||||
@@ -4,32 +4,55 @@ import (
|
|||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TokenizeEndpoint exposes a REST API to tokenize the content
|
// TokenizeEndpoint exposes a REST API to tokenize the content
|
||||||
// @Summary Tokenize the input.
|
// @Summary Tokenize the input.
|
||||||
// @Param request body schema.TokenizeRequest true "Request"
|
|
||||||
// @Success 200 {object} schema.TokenizeResponse "Response"
|
// @Success 200 {object} schema.TokenizeResponse "Response"
|
||||||
// @Router /v1/tokenize [post]
|
// @Router /v1/tokenize [post]
|
||||||
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(ctx *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input, ok := ctx.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TokenizeRequest)
|
|
||||||
if !ok || input.Model == "" {
|
input := new(schema.TokenizeRequest)
|
||||||
return fiber.ErrBadRequest
|
|
||||||
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, ok := ctx.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||||
if !ok || cfg == nil {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Err(err)
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig)
|
tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return ctx.JSON(tokenResponse)
|
|
||||||
|
c.JSON(tokenResponse)
|
||||||
|
return nil
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ package localai
|
|||||||
import (
|
import (
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
@@ -24,24 +24,37 @@ import (
|
|||||||
// @Router /tts [post]
|
// @Router /tts [post]
|
||||||
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.TTSRequest)
|
input := new(schema.TTSRequest)
|
||||||
if !ok || input.Model == "" {
|
|
||||||
return fiber.ErrBadRequest
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||||
if !ok || cfg == nil {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Str("model", input.Model).Msg("LocalAI TTS Request recieved")
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
|
||||||
if cfg.Backend == "" {
|
if err != nil {
|
||||||
if input.Backend != "" {
|
log.Err(err)
|
||||||
cfg.Backend = input.Backend
|
modelFile = input.Model
|
||||||
} else {
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
cfg.Backend = model.PiperBackend
|
} else {
|
||||||
}
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
|
if input.Backend != "" {
|
||||||
|
cfg.Backend = input.Backend
|
||||||
}
|
}
|
||||||
|
|
||||||
if input.Language != "" {
|
if input.Language != "" {
|
||||||
@@ -52,7 +65,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
|||||||
cfg.Voice = input.Voice
|
cfg.Voice = input.Voice
|
||||||
}
|
}
|
||||||
|
|
||||||
filePath, _, err := backend.ModelTTS(input.Input, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
|
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,8 +4,9 @@ import (
|
|||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
@@ -18,20 +19,45 @@ import (
|
|||||||
// @Router /vad [post]
|
// @Router /vad [post]
|
||||||
func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.VADRequest)
|
input := new(schema.VADRequest)
|
||||||
if !ok || input.Model == "" {
|
|
||||||
return fiber.ErrBadRequest
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||||
if !ok || cfg == nil {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Str("model", input.Model).Msg("LocalAI VAD Request recieved")
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
|
||||||
resp, err := backend.VAD(input, c.Context(), ml, appConfig, *cfg)
|
if err != nil {
|
||||||
|
log.Err(err)
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
|
opts := backend.ModelOptions(*cfg, appConfig, model.WithBackendString(cfg.Backend), model.WithModel(modelFile))
|
||||||
|
|
||||||
|
vadModel, err := ml.Load(opts...)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
req := proto.VADRequest{
|
||||||
|
Audio: input.Audio,
|
||||||
|
}
|
||||||
|
resp, err := vadModel.VAD(c.Context(), &req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,19 +5,18 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/functions"
|
"github.com/mudler/LocalAI/pkg/functions"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
|
||||||
"github.com/mudler/LocalAI/pkg/templates"
|
"github.com/mudler/LocalAI/pkg/templates"
|
||||||
|
|
||||||
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"github.com/valyala/fasthttp"
|
"github.com/valyala/fasthttp"
|
||||||
)
|
)
|
||||||
@@ -175,20 +174,26 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
|||||||
textContentToReturn = ""
|
textContentToReturn = ""
|
||||||
id = uuid.New().String()
|
id = uuid.New().String()
|
||||||
created = int(time.Now().Unix())
|
created = int(time.Now().Unix())
|
||||||
|
// Set CorrelationID
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
correlationID := c.Get("X-Correlation-ID")
|
||||||
if !ok || input.Model == "" {
|
if len(strings.TrimSpace(correlationID)) == 0 {
|
||||||
return fiber.ErrBadRequest
|
correlationID = id
|
||||||
}
|
}
|
||||||
|
c.Set("X-Correlation-ID", correlationID)
|
||||||
|
|
||||||
|
// Opt-in extra usage flag
|
||||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||||
|
|
||||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
||||||
if !ok || config == nil {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Chat endpoint configuration read: %+v", config)
|
config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Configuration read: %+v", config)
|
||||||
|
|
||||||
funcs := input.Functions
|
funcs := input.Functions
|
||||||
shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
|
shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
|
||||||
@@ -396,11 +401,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
|||||||
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
|
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
|
||||||
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
|
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
|
||||||
|
|
||||||
finishReason := "stop"
|
|
||||||
if len(input.Tools) > 0 {
|
|
||||||
finishReason = "tool_calls"
|
|
||||||
}
|
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case noActionsToRun:
|
case noActionsToRun:
|
||||||
result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput)
|
result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput)
|
||||||
@@ -408,18 +408,19 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
|||||||
log.Error().Err(err).Msg("error handling question")
|
log.Error().Err(err).Msg("error handling question")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
*c = append(*c, schema.Choice{
|
*c = append(*c, schema.Choice{
|
||||||
FinishReason: finishReason,
|
Message: &schema.Message{Role: "assistant", Content: &result}})
|
||||||
Message: &schema.Message{Role: "assistant", Content: &result}})
|
|
||||||
default:
|
default:
|
||||||
toolChoice := schema.Choice{
|
toolChoice := schema.Choice{
|
||||||
FinishReason: finishReason,
|
|
||||||
Message: &schema.Message{
|
Message: &schema.Message{
|
||||||
Role: "assistant",
|
Role: "assistant",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(input.Tools) > 0 {
|
||||||
|
toolChoice.FinishReason = "tool_calls"
|
||||||
|
}
|
||||||
|
|
||||||
for _, ss := range results {
|
for _, ss := range results {
|
||||||
name, args := ss.Name, ss.Arguments
|
name, args := ss.Name, ss.Arguments
|
||||||
if len(input.Tools) > 0 {
|
if len(input.Tools) > 0 {
|
||||||
@@ -437,7 +438,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
// otherwise we return more choices directly (deprecated)
|
// otherwise we return more choices directly
|
||||||
*c = append(*c, schema.Choice{
|
*c = append(*c, schema.Choice{
|
||||||
FinishReason: "function_call",
|
FinishReason: "function_call",
|
||||||
Message: &schema.Message{
|
Message: &schema.Message{
|
||||||
@@ -538,7 +539,7 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
|
|||||||
audios = append(audios, m.StringAudios...)
|
audios = append(audios, m.StringAudios...)
|
||||||
}
|
}
|
||||||
|
|
||||||
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, o, nil)
|
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Msg("model inference failed")
|
log.Error().Err(err).Msg("model inference failed")
|
||||||
return "", err
|
return "", err
|
||||||
|
|||||||
@@ -10,13 +10,12 @@ import (
|
|||||||
|
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/functions"
|
"github.com/mudler/LocalAI/pkg/functions"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/LocalAI/pkg/templates"
|
"github.com/mudler/LocalAI/pkg/templates"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"github.com/valyala/fasthttp"
|
"github.com/valyala/fasthttp"
|
||||||
@@ -28,9 +27,10 @@ import (
|
|||||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||||
// @Router /v1/completions [post]
|
// @Router /v1/completions [post]
|
||||||
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
|
id := uuid.New().String()
|
||||||
created := int(time.Now().Unix())
|
created := int(time.Now().Unix())
|
||||||
|
|
||||||
process := func(id string, s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
||||||
ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||||
usage := schema.OpenAIUsage{
|
usage := schema.OpenAIUsage{
|
||||||
PromptTokens: tokenUsage.Prompt,
|
PromptTokens: tokenUsage.Prompt,
|
||||||
@@ -63,18 +63,22 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
|||||||
}
|
}
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
// Handle Correlation
|
// Add Correlation
|
||||||
id := c.Get("X-Correlation-ID", uuid.New().String())
|
c.Set("X-Correlation-ID", id)
|
||||||
|
|
||||||
|
// Opt-in extra usage flag
|
||||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||||
|
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||||
if !ok || input.Model == "" {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
log.Debug().Msgf("`input`: %+v", input)
|
||||||
if !ok || config == nil {
|
|
||||||
return fiber.ErrBadRequest
|
config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.ResponseFormatMap != nil {
|
if config.ResponseFormatMap != nil {
|
||||||
@@ -118,7 +122,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
|||||||
|
|
||||||
responses := make(chan schema.OpenAIResponse)
|
responses := make(chan schema.OpenAIResponse)
|
||||||
|
|
||||||
go process(id, predInput, input, config, ml, responses, extraUsage)
|
go process(predInput, input, config, ml, responses, extraUsage)
|
||||||
|
|
||||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||||
|
|
||||||
|
|||||||
@@ -2,17 +2,16 @@ package openai
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
|
||||||
"github.com/mudler/LocalAI/pkg/templates"
|
"github.com/mudler/LocalAI/pkg/templates"
|
||||||
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
@@ -26,21 +25,20 @@ import (
|
|||||||
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
|
||||||
if !ok || input.Model == "" {
|
|
||||||
return fiber.ErrBadRequest
|
|
||||||
}
|
|
||||||
// Opt-in extra usage flag
|
// Opt-in extra usage flag
|
||||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||||
|
|
||||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||||
if !ok || config == nil {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Edit Endpoint Input : %+v", input)
|
config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||||
log.Debug().Msgf("Edit Endpoint Config: %+v", *config)
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
|
|
||||||
var result []schema.Choice
|
var result []schema.Choice
|
||||||
totalTokenUsage := backend.TokenUsage{}
|
totalTokenUsage := backend.TokenUsage{}
|
||||||
|
|||||||
@@ -2,11 +2,11 @@ package openai
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
@@ -23,14 +23,14 @@ import (
|
|||||||
// @Router /v1/embeddings [post]
|
// @Router /v1/embeddings [post]
|
||||||
func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
model, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||||
if !ok || input.Model == "" {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
config, input, err := mergeRequestWithConfig(model, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||||
if !ok || config == nil {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ import (
|
|||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
@@ -67,23 +66,25 @@ func downloadFile(url string) (string, error) {
|
|||||||
// @Router /v1/images/generations [post]
|
// @Router /v1/images/generations [post]
|
||||||
func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
m, input, err := readRequest(c, cl, ml, appConfig, false)
|
||||||
if !ok || input.Model == "" {
|
if err != nil {
|
||||||
log.Error().Msg("Image Endpoint - Invalid Input")
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
return fiber.ErrBadRequest
|
|
||||||
}
|
}
|
||||||
|
|
||||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
if m == "" {
|
||||||
if !ok || config == nil {
|
m = "stablediffusion"
|
||||||
log.Error().Msg("Image Endpoint - Invalid Config")
|
}
|
||||||
return fiber.ErrBadRequest
|
log.Debug().Msgf("Loading model: %+v", m)
|
||||||
|
|
||||||
|
config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, 0, 0, false)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
src := ""
|
src := ""
|
||||||
if input.File != "" {
|
if input.File != "" {
|
||||||
|
|
||||||
fileData := []byte{}
|
fileData := []byte{}
|
||||||
var err error
|
|
||||||
// check if input.File is an URL, if so download it and save it
|
// check if input.File is an URL, if so download it and save it
|
||||||
// to a temporary file
|
// to a temporary file
|
||||||
if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
|
if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ func ComputeChoices(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// get the model function to call for the result
|
// get the model function to call for the result
|
||||||
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, config, o, tokenCallback)
|
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, *config, o, tokenCallback)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return result, backend.TokenUsage{}, err
|
return result, backend.TokenUsage{}, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,22 +1,20 @@
|
|||||||
package middleware
|
package openai
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
|
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/core/services"
|
|
||||||
"github.com/mudler/LocalAI/pkg/functions"
|
"github.com/mudler/LocalAI/pkg/functions"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/LocalAI/pkg/templates"
|
"github.com/mudler/LocalAI/pkg/templates"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -25,166 +23,33 @@ type correlationIDKeyType string
|
|||||||
// CorrelationIDKey to track request across process boundary
|
// CorrelationIDKey to track request across process boundary
|
||||||
const CorrelationIDKey correlationIDKeyType = "correlationID"
|
const CorrelationIDKey correlationIDKeyType = "correlationID"
|
||||||
|
|
||||||
type RequestExtractor struct {
|
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
||||||
backendConfigLoader *config.BackendConfigLoader
|
input := new(schema.OpenAIRequest)
|
||||||
modelLoader *model.ModelLoader
|
|
||||||
applicationConfig *config.ApplicationConfig
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewRequestExtractor(backendConfigLoader *config.BackendConfigLoader, modelLoader *model.ModelLoader, applicationConfig *config.ApplicationConfig) *RequestExtractor {
|
// Get input data from the request body
|
||||||
return &RequestExtractor{
|
if err := c.BodyParser(input); err != nil {
|
||||||
backendConfigLoader: backendConfigLoader,
|
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
||||||
modelLoader: modelLoader,
|
|
||||||
applicationConfig: applicationConfig,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const CONTEXT_LOCALS_KEY_MODEL_NAME = "MODEL_NAME"
|
|
||||||
const CONTEXT_LOCALS_KEY_LOCALAI_REQUEST = "LOCALAI_REQUEST"
|
|
||||||
const CONTEXT_LOCALS_KEY_MODEL_CONFIG = "MODEL_CONFIG"
|
|
||||||
|
|
||||||
// TODO: Refactor to not return error if unchanged
|
|
||||||
func (re *RequestExtractor) setModelNameFromRequest(ctx *fiber.Ctx) {
|
|
||||||
model, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
|
||||||
if ok && model != "" {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
model = ctx.Params("model")
|
|
||||||
|
|
||||||
if (model == "") && ctx.Query("model") != "" {
|
|
||||||
model = ctx.Query("model")
|
|
||||||
}
|
|
||||||
|
|
||||||
if model == "" {
|
|
||||||
// Set model from bearer token, if available
|
|
||||||
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // "Bearer " => "Bear" to please go-staticcheck. It looks dumb but we might as well take free performance on something called for nearly every request.
|
|
||||||
if bearer != "" {
|
|
||||||
exists, err := services.CheckIfModelExists(re.backendConfigLoader, re.modelLoader, bearer, services.ALWAYS_INCLUDE)
|
|
||||||
if err == nil && exists {
|
|
||||||
model = bearer
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, model)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (re *RequestExtractor) BuildConstantDefaultModelNameMiddleware(defaultModelName string) fiber.Handler {
|
|
||||||
return func(ctx *fiber.Ctx) error {
|
|
||||||
re.setModelNameFromRequest(ctx)
|
|
||||||
localModelName, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
|
||||||
if !ok || localModelName == "" {
|
|
||||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, defaultModelName)
|
|
||||||
log.Debug().Str("defaultModelName", defaultModelName).Msg("context local model name not found, setting to default")
|
|
||||||
}
|
|
||||||
return ctx.Next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (re *RequestExtractor) BuildFilteredFirstAvailableDefaultModel(filterFn config.BackendConfigFilterFn) fiber.Handler {
|
|
||||||
return func(ctx *fiber.Ctx) error {
|
|
||||||
re.setModelNameFromRequest(ctx)
|
|
||||||
localModelName := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
|
||||||
if localModelName != "" { // Don't overwrite existing values
|
|
||||||
return ctx.Next()
|
|
||||||
}
|
|
||||||
|
|
||||||
modelNames, err := services.ListModels(re.backendConfigLoader, re.modelLoader, filterFn, services.SKIP_IF_CONFIGURED)
|
|
||||||
if err != nil {
|
|
||||||
log.Error().Err(err).Msg("non-fatal error calling ListModels during SetDefaultModelNameToFirstAvailable()")
|
|
||||||
return ctx.Next()
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(modelNames) == 0 {
|
|
||||||
log.Warn().Msg("SetDefaultModelNameToFirstAvailable used with no matching models installed")
|
|
||||||
// This is non-fatal - making it so was breaking the case of direct installation of raw models
|
|
||||||
// return errors.New("this endpoint requires at least one model to be installed")
|
|
||||||
return ctx.Next()
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME, modelNames[0])
|
|
||||||
log.Debug().Str("first model name", modelNames[0]).Msg("context local model name not found, setting to the first model")
|
|
||||||
return ctx.Next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: If context and cancel above belong on all methods, move that part of above into here!
|
|
||||||
// Otherwise, it's in its own method below for now
|
|
||||||
func (re *RequestExtractor) SetModelAndConfig(initializer func() schema.LocalAIRequest) fiber.Handler {
|
|
||||||
return func(ctx *fiber.Ctx) error {
|
|
||||||
input := initializer()
|
|
||||||
if input == nil {
|
|
||||||
return fmt.Errorf("unable to initialize body")
|
|
||||||
}
|
|
||||||
if err := ctx.BodyParser(input); err != nil {
|
|
||||||
return fmt.Errorf("failed parsing request body: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this request doesn't have an associated model name, fetch it from earlier in the middleware chain
|
|
||||||
if input.ModelName(nil) == "" {
|
|
||||||
localModelName, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_NAME).(string)
|
|
||||||
if ok && localModelName != "" {
|
|
||||||
log.Debug().Str("context localModelName", localModelName).Msg("overriding empty model name in request body with value found earlier in middleware chain")
|
|
||||||
input.ModelName(&localModelName)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cfg, err := re.backendConfigLoader.LoadBackendConfigFileByNameDefaultOptions(input.ModelName(nil), re.applicationConfig)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Err(err)
|
|
||||||
log.Warn().Msgf("Model Configuration File not found for %q", input.ModelName(nil))
|
|
||||||
} else if cfg.Model == "" && input.ModelName(nil) != "" {
|
|
||||||
log.Debug().Str("input.ModelName", input.ModelName(nil)).Msg("config does not include model, using input")
|
|
||||||
cfg.Model = input.ModelName(nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST, input)
|
|
||||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG, cfg)
|
|
||||||
|
|
||||||
return ctx.Next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (re *RequestExtractor) SetOpenAIRequest(ctx *fiber.Ctx) error {
|
|
||||||
input, ok := ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
|
||||||
if !ok || input.Model == "" {
|
|
||||||
return fiber.ErrBadRequest
|
|
||||||
}
|
|
||||||
|
|
||||||
cfg, ok := ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
|
||||||
if !ok || cfg == nil {
|
|
||||||
return fiber.ErrBadRequest
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
received, _ := json.Marshal(input)
|
||||||
// Extract or generate the correlation ID
|
// Extract or generate the correlation ID
|
||||||
correlationID := ctx.Get("X-Correlation-ID", uuid.New().String())
|
correlationID := c.Get("X-Correlation-ID", uuid.New().String())
|
||||||
ctx.Set("X-Correlation-ID", correlationID)
|
|
||||||
|
|
||||||
c1, cancel := context.WithCancel(re.applicationConfig.Context)
|
ctx, cancel := context.WithCancel(o.Context)
|
||||||
// Add the correlation ID to the new context
|
// Add the correlation ID to the new context
|
||||||
ctxWithCorrelationID := context.WithValue(c1, CorrelationIDKey, correlationID)
|
ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID)
|
||||||
|
|
||||||
input.Context = ctxWithCorrelationID
|
input.Context = ctxWithCorrelationID
|
||||||
input.Cancel = cancel
|
input.Cancel = cancel
|
||||||
|
|
||||||
err := mergeOpenAIRequestAndBackendConfig(cfg, input)
|
log.Debug().Msgf("Request received: %s", string(received))
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if cfg.Model == "" {
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, firstModel)
|
||||||
log.Debug().Str("input.Model", input.Model).Msg("replacing empty cfg.Model with input value")
|
|
||||||
cfg.Model = input.Model
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.Locals(CONTEXT_LOCALS_KEY_LOCALAI_REQUEST, input)
|
return modelFile, input, err
|
||||||
ctx.Locals(CONTEXT_LOCALS_KEY_MODEL_CONFIG, cfg)
|
|
||||||
|
|
||||||
return ctx.Next()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *schema.OpenAIRequest) error {
|
func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIRequest) {
|
||||||
if input.Echo {
|
if input.Echo {
|
||||||
config.Echo = input.Echo
|
config.Echo = input.Echo
|
||||||
}
|
}
|
||||||
@@ -384,8 +249,6 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
|
|||||||
config.TypicalP = input.TypicalP
|
config.TypicalP = input.TypicalP
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Str("input.Input", fmt.Sprintf("%+v", input.Input))
|
|
||||||
|
|
||||||
switch inputs := input.Input.(type) {
|
switch inputs := input.Input.(type) {
|
||||||
case string:
|
case string:
|
||||||
if inputs != "" {
|
if inputs != "" {
|
||||||
@@ -442,9 +305,22 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
|
|||||||
config.Step = q
|
config.Step = q
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if config.Validate() {
|
|
||||||
return nil
|
func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
|
||||||
}
|
cfg, err := cm.LoadBackendConfigFileByName(modelFile, loader.ModelPath,
|
||||||
return fmt.Errorf("unable to validate configuration after merging")
|
config.LoadOptionDebug(debug),
|
||||||
|
config.LoadOptionThreads(threads),
|
||||||
|
config.LoadOptionContextSize(ctx),
|
||||||
|
config.LoadOptionF16(f16),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Set the parameters for the language model prediction
|
||||||
|
updateRequestConfig(cfg, input)
|
||||||
|
|
||||||
|
if !cfg.Validate() {
|
||||||
|
return nil, nil, fmt.Errorf("failed to validate config")
|
||||||
|
}
|
||||||
|
|
||||||
|
return cfg, input, err
|
||||||
}
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package openai
|
package openai
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
@@ -9,8 +10,6 @@ import (
|
|||||||
|
|
||||||
"github.com/mudler/LocalAI/core/backend"
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
model "github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
@@ -26,16 +25,15 @@ import (
|
|||||||
// @Router /v1/audio/transcriptions [post]
|
// @Router /v1/audio/transcriptions [post]
|
||||||
func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
|
m, input, err := readRequest(c, cl, ml, appConfig, false)
|
||||||
if !ok || input.Model == "" {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, ok := c.Locals(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.BackendConfig)
|
config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
||||||
if !ok || config == nil {
|
if err != nil {
|
||||||
return fiber.ErrBadRequest
|
return fmt.Errorf("failed reading parameters from request: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// retrieve the file data from the request
|
// retrieve the file data from the request
|
||||||
file, err := c.FormFile("file")
|
file, err := c.FormFile("file")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -4,26 +4,17 @@ import (
|
|||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/elevenlabs"
|
"github.com/mudler/LocalAI/core/http/endpoints/elevenlabs"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func RegisterElevenLabsRoutes(app *fiber.App,
|
func RegisterElevenLabsRoutes(app *fiber.App,
|
||||||
re *middleware.RequestExtractor,
|
|
||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig) {
|
appConfig *config.ApplicationConfig) {
|
||||||
|
|
||||||
// Elevenlabs
|
// Elevenlabs
|
||||||
app.Post("/v1/text-to-speech/:voice-id",
|
app.Post("/v1/text-to-speech/:voice-id", elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
|
|
||||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.ElevenLabsTTSRequest) }),
|
|
||||||
elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
|
||||||
|
|
||||||
app.Post("/v1/sound-generation",
|
app.Post("/v1/sound-generation", elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
||||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_SOUND_GENERATION)),
|
|
||||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.ElevenLabsSoundGenerationRequest) }),
|
|
||||||
elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,22 +3,16 @@ package routes
|
|||||||
import (
|
import (
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/jina"
|
"github.com/mudler/LocalAI/core/http/endpoints/jina"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func RegisterJINARoutes(app *fiber.App,
|
func RegisterJINARoutes(app *fiber.App,
|
||||||
re *middleware.RequestExtractor,
|
|
||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig) {
|
appConfig *config.ApplicationConfig) {
|
||||||
|
|
||||||
// POST endpoint to mimic the reranking
|
// POST endpoint to mimic the reranking
|
||||||
app.Post("/v1/rerank",
|
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
|
||||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_RERANK)),
|
|
||||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.JINARerankRequest) }),
|
|
||||||
jina.JINARerankEndpoint(cl, ml, appConfig))
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,16 +5,13 @@ import (
|
|||||||
"github.com/gofiber/swagger"
|
"github.com/gofiber/swagger"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
"github.com/mudler/LocalAI/core/p2p"
|
"github.com/mudler/LocalAI/core/p2p"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
"github.com/mudler/LocalAI/core/services"
|
"github.com/mudler/LocalAI/core/services"
|
||||||
"github.com/mudler/LocalAI/internal"
|
"github.com/mudler/LocalAI/internal"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func RegisterLocalAIRoutes(router *fiber.App,
|
func RegisterLocalAIRoutes(router *fiber.App,
|
||||||
requestExtractor *middleware.RequestExtractor,
|
|
||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
@@ -36,18 +33,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
|||||||
router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
|
router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||||
}
|
}
|
||||||
|
|
||||||
router.Post("/tts",
|
router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
|
||||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
|
router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
|
||||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),
|
|
||||||
localai.TTSEndpoint(cl, ml, appConfig))
|
|
||||||
|
|
||||||
vadChain := []fiber.Handler{
|
|
||||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VAD)),
|
|
||||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VADRequest) }),
|
|
||||||
localai.VADEndpoint(cl, ml, appConfig),
|
|
||||||
}
|
|
||||||
router.Post("/vad", vadChain...)
|
|
||||||
router.Post("/v1/vad", vadChain...)
|
|
||||||
|
|
||||||
// Stores
|
// Stores
|
||||||
sl := model.NewModelLoader("")
|
sl := model.NewModelLoader("")
|
||||||
@@ -60,14 +47,10 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
|||||||
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Backend Statistics Module
|
// Experimental Backend Statistics Module
|
||||||
// TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
|
|
||||||
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
||||||
router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
||||||
router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
||||||
// The v1/* urls are exactly the same as above - makes local e2e testing easier if they are registered.
|
|
||||||
router.Get("/v1/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
|
||||||
router.Post("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
|
||||||
|
|
||||||
// p2p
|
// p2p
|
||||||
if p2p.IsP2PEnabled() {
|
if p2p.IsP2PEnabled() {
|
||||||
@@ -84,9 +67,6 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
|||||||
router.Get("/system", localai.SystemInformations(ml, appConfig))
|
router.Get("/system", localai.SystemInformations(ml, appConfig))
|
||||||
|
|
||||||
// misc
|
// misc
|
||||||
router.Post("/v1/tokenize",
|
router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
|
||||||
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
|
|
||||||
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }),
|
|
||||||
localai.TokenizeEndpoint(cl, ml, appConfig))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,50 +3,51 @@ package routes
|
|||||||
import (
|
import (
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/mudler/LocalAI/core/application"
|
"github.com/mudler/LocalAI/core/application"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
||||||
"github.com/mudler/LocalAI/core/http/middleware"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func RegisterOpenAIRoutes(app *fiber.App,
|
func RegisterOpenAIRoutes(app *fiber.App,
|
||||||
re *middleware.RequestExtractor,
|
|
||||||
application *application.Application) {
|
application *application.Application) {
|
||||||
// openAI compatible API endpoint
|
// openAI compatible API endpoint
|
||||||
|
|
||||||
// chat
|
// chat
|
||||||
chatChain := []fiber.Handler{
|
app.Post("/v1/chat/completions",
|
||||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
|
openai.ChatEndpoint(
|
||||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
application.BackendLoader(),
|
||||||
re.SetOpenAIRequest,
|
application.ModelLoader(),
|
||||||
openai.ChatEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
|
application.TemplatesEvaluator(),
|
||||||
}
|
application.ApplicationConfig(),
|
||||||
app.Post("/v1/chat/completions", chatChain...)
|
),
|
||||||
app.Post("/chat/completions", chatChain...)
|
)
|
||||||
|
|
||||||
|
app.Post("/chat/completions",
|
||||||
|
openai.ChatEndpoint(
|
||||||
|
application.BackendLoader(),
|
||||||
|
application.ModelLoader(),
|
||||||
|
application.TemplatesEvaluator(),
|
||||||
|
application.ApplicationConfig(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
// edit
|
// edit
|
||||||
editChain := []fiber.Handler{
|
app.Post("/v1/edits",
|
||||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EDIT)),
|
openai.EditEndpoint(
|
||||||
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
|
application.BackendLoader(),
|
||||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
application.ModelLoader(),
|
||||||
re.SetOpenAIRequest,
|
application.TemplatesEvaluator(),
|
||||||
openai.EditEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
|
application.ApplicationConfig(),
|
||||||
}
|
),
|
||||||
app.Post("/v1/edits", editChain...)
|
)
|
||||||
app.Post("/edits", editChain...)
|
|
||||||
|
|
||||||
// completion
|
app.Post("/edits",
|
||||||
completionChain := []fiber.Handler{
|
openai.EditEndpoint(
|
||||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_COMPLETION)),
|
application.BackendLoader(),
|
||||||
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
|
application.ModelLoader(),
|
||||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
application.TemplatesEvaluator(),
|
||||||
re.SetOpenAIRequest,
|
application.ApplicationConfig(),
|
||||||
openai.CompletionEndpoint(application.BackendLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()),
|
),
|
||||||
}
|
)
|
||||||
app.Post("/v1/completions", completionChain...)
|
|
||||||
app.Post("/completions", completionChain...)
|
|
||||||
app.Post("/v1/engines/:model/completions", completionChain...)
|
|
||||||
|
|
||||||
// assistant
|
// assistant
|
||||||
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||||
@@ -80,37 +81,45 @@ func RegisterOpenAIRoutes(app *fiber.App,
|
|||||||
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||||
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
|
||||||
|
|
||||||
// embeddings
|
// completion
|
||||||
embeddingChain := []fiber.Handler{
|
app.Post("/v1/completions",
|
||||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EMBEDDINGS)),
|
openai.CompletionEndpoint(
|
||||||
re.BuildConstantDefaultModelNameMiddleware("gpt-4o"),
|
application.BackendLoader(),
|
||||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
application.ModelLoader(),
|
||||||
re.SetOpenAIRequest,
|
application.TemplatesEvaluator(),
|
||||||
openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()),
|
application.ApplicationConfig(),
|
||||||
}
|
),
|
||||||
app.Post("/v1/embeddings", embeddingChain...)
|
|
||||||
app.Post("/embeddings", embeddingChain...)
|
|
||||||
app.Post("/v1/engines/:model/embeddings", embeddingChain...)
|
|
||||||
|
|
||||||
// audio
|
|
||||||
app.Post("/v1/audio/transcriptions",
|
|
||||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)),
|
|
||||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
|
||||||
re.SetOpenAIRequest,
|
|
||||||
openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
app.Post("/v1/audio/speech",
|
app.Post("/completions",
|
||||||
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
|
openai.CompletionEndpoint(
|
||||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),
|
application.BackendLoader(),
|
||||||
localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
application.ModelLoader(),
|
||||||
|
application.TemplatesEvaluator(),
|
||||||
|
application.ApplicationConfig(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
app.Post("/v1/engines/:model/completions",
|
||||||
|
openai.CompletionEndpoint(
|
||||||
|
application.BackendLoader(),
|
||||||
|
application.ModelLoader(),
|
||||||
|
application.TemplatesEvaluator(),
|
||||||
|
application.ApplicationConfig(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
// embeddings
|
||||||
|
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||||
|
app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||||
|
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||||
|
|
||||||
|
// audio
|
||||||
|
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||||
|
app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||||
|
|
||||||
// images
|
// images
|
||||||
app.Post("/v1/images/generations",
|
app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||||
re.BuildConstantDefaultModelNameMiddleware("stablediffusion"),
|
|
||||||
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
|
|
||||||
re.SetOpenAIRequest,
|
|
||||||
openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
|
||||||
|
|
||||||
if application.ApplicationConfig().ImageDir != "" {
|
if application.ApplicationConfig().ImageDir != "" {
|
||||||
app.Static("/generated-images", application.ApplicationConfig().ImageDir)
|
app.Static("/generated-images", application.ApplicationConfig().ImageDir)
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
<footer class="text-center py-8">
|
<footer class="text-center py-8">
|
||||||
LocalAI Version {{.Version}}<br>
|
LocalAI Version {{.Version}}<br>
|
||||||
<a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2025 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
|
<a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
|
||||||
</footer>
|
</footer>
|
||||||
<script src="static/assets/tw-elements.js"></script>
|
<script src="static/assets/tw-elements.js"></script>
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user