Compare commits

...

98 Commits

Author SHA1 Message Date
Ettore Di Giacinto
a8db308d7b chore(deps): bump llama.cpp to '92bb442ad999a0d52df0af2730cd861012e8ac5c'
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-12 22:20:15 +01:00
Ettore Di Giacinto
b1d1f2a37d chore(importers): small logic enhancements (#7262)
* chore(import): import mmproj files to specific folder

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Slightly enhance logics

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-12 22:08:08 +01:00
Ettore Di Giacinto
3728552e94 feat: import models via URI (#7245)
* feat: initial hook to install elements directly

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* WIP: ui changes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Move HF api client to pkg

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add simple importer for gguf files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add opcache

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* wire importers to CLI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add omitempty to config fields

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add MLX importer

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small refactors to star to use HF for discovery

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Common preferences

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add support to bare HF repos

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(importer/llama.cpp): add support for mmproj files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* add mmproj quants to common preferences

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix vlm usage in tokenizer mode with llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-12 20:48:56 +01:00
Shohidul Bari
87d0020c10 chore: display file names in model preview (#7251)
* chore: add file names in the model response

Signed-off-by: shohidulbari <shohidulbari18@gmail.com>

* chore: show file names in model preview

Signed-off-by: shohidulbari <shohidulbari18@gmail.com>

* chore: fix prettier format

Signed-off-by: shohidulbari <shohidulbari18@gmail.com>

---------

Signed-off-by: shohidulbari <shohidulbari18@gmail.com>
2025-11-12 09:20:54 +01:00
LocalAI [bot]
a8eb537071 chore(model-gallery): ⬆️ update checksum (#7248)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-12 09:14:09 +01:00
Mikhail Khludnev
04fe0b0da8 fix(reranker): llama-cpp sort score desc, crop top_n (#7211)
Signed-off-by: Mikhail Khludnev <mkhl@apache.org>
2025-11-12 09:13:01 +01:00
LocalAI [bot]
fae93e5ba2 chore: ⬆️ Update ggml-org/llama.cpp to 7d019cff744b73084b15ca81ba9916f3efab1223 (#7247)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-11 21:31:01 +00:00
LocalAI [bot]
b606034243 chore(model gallery): 🤖 add 1 new models via gallery agent (#7237)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-11 09:19:19 +01:00
LocalAI [bot]
5f4663252d chore: ⬆️ Update ggml-org/llama.cpp to 13730c183b9e1a32c09bf132b5367697d6c55048 (#7232)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-11 00:03:01 +01:00
LocalAI [bot]
80bb7c5f67 chore: ⬆️ Update ggml-org/whisper.cpp to a1867e0dad0b21b35afa43fc815dae60c9a139d6 (#7231)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-10 22:51:49 +01:00
Ettore Di Giacinto
f6881ea023 fix(edit): propagate correctly opts when reloading (#7233)
fix(edit): propagate correctly ops when reloading

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-10 22:06:32 +01:00
dependabot[bot]
5651a19aa1 chore(deps): bump github.com/ebitengine/purego from 0.9.0 to 0.9.1 (#7229)
Bumps [github.com/ebitengine/purego](https://github.com/ebitengine/purego) from 0.9.0 to 0.9.1.
- [Release notes](https://github.com/ebitengine/purego/releases)
- [Commits](https://github.com/ebitengine/purego/compare/v0.9.0...v0.9.1)

---
updated-dependencies:
- dependency-name: github.com/ebitengine/purego
  dependency-version: 0.9.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-10 20:16:32 +00:00
dependabot[bot]
c834cdb826 chore(deps): bump github.com/testcontainers/testcontainers-go from 0.38.0 to 0.40.0 (#7230)
chore(deps): bump github.com/testcontainers/testcontainers-go

Bumps [github.com/testcontainers/testcontainers-go](https://github.com/testcontainers/testcontainers-go) from 0.38.0 to 0.40.0.
- [Release notes](https://github.com/testcontainers/testcontainers-go/releases)
- [Commits](https://github.com/testcontainers/testcontainers-go/compare/v0.38.0...v0.40.0)

---
updated-dependencies:
- dependency-name: github.com/testcontainers/testcontainers-go
  dependency-version: 0.40.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-10 19:40:57 +01:00
dependabot[bot]
fa2caef63d chore(deps): bump github.com/docker/docker from 28.5.1+incompatible to 28.5.2+incompatible (#7228)
chore(deps): bump github.com/docker/docker

Bumps [github.com/docker/docker](https://github.com/docker/docker) from 28.5.1+incompatible to 28.5.2+incompatible.
- [Release notes](https://github.com/docker/docker/releases)
- [Commits](https://github.com/docker/docker/compare/v28.5.1...v28.5.2)

---
updated-dependencies:
- dependency-name: github.com/docker/docker
  dependency-version: 28.5.2+incompatible
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-10 19:40:26 +01:00
dependabot[bot]
31abc799f9 chore(deps): bump github.com/jaypipes/ghw from 0.19.1 to 0.20.0 (#7227)
Bumps [github.com/jaypipes/ghw](https://github.com/jaypipes/ghw) from 0.19.1 to 0.20.0.
- [Release notes](https://github.com/jaypipes/ghw/releases)
- [Commits](https://github.com/jaypipes/ghw/compare/v0.19.1...v0.20.0)

---
updated-dependencies:
- dependency-name: github.com/jaypipes/ghw
  dependency-version: 0.20.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-10 19:40:06 +01:00
dependabot[bot]
2368395a0c chore(deps): bump github.com/mudler/cogito from 0.5.0 to 0.5.1 (#7226)
Bumps [github.com/mudler/cogito](https://github.com/mudler/cogito) from 0.5.0 to 0.5.1.
- [Release notes](https://github.com/mudler/cogito/releases)
- [Commits](https://github.com/mudler/cogito/compare/v0.5.0...v0.5.1)

---
updated-dependencies:
- dependency-name: github.com/mudler/cogito
  dependency-version: 0.5.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-10 19:39:45 +01:00
dependabot[bot]
bf77c11b65 chore(deps): bump appleboy/ssh-action from 1.2.2 to 1.2.3 (#7224)
Bumps [appleboy/ssh-action](https://github.com/appleboy/ssh-action) from 1.2.2 to 1.2.3.
- [Release notes](https://github.com/appleboy/ssh-action/releases)
- [Changelog](https://github.com/appleboy/ssh-action/blob/master/.goreleaser.yaml)
- [Commits](https://github.com/appleboy/ssh-action/compare/v1.2.2...v1.2.3)

---
updated-dependencies:
- dependency-name: appleboy/ssh-action
  dependency-version: 1.2.3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-10 19:34:13 +01:00
Ettore Di Giacinto
8876073f5c feat(ui): chat stats, small visual enhancements (#7223)
* feat(ui): show stats in chat, improve style

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Markdown, small improvements

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Display token/sec into stats

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Minor enhancement

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Revert "Fixups"

This reverts commit ab1b3d6da9.

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-10 18:12:07 +01:00
Ettore Di Giacinto
8432915cb8 feat(ui): add wizard when p2p is disabled (#7218)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-10 10:43:54 +01:00
LocalAI [bot]
9ddb94b507 chore(model gallery): 🤖 add 1 new models via gallery agent (#7216)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-10 09:38:52 +01:00
LocalAI [bot]
e42f0f7e79 chore: ⬆️ Update ggml-org/llama.cpp to b8595b16e69e3029e06be3b8f6635f9812b2bc3f (#7210)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-09 23:56:27 +01:00
Copilot
34bc1bda1e fix(api): SSE streaming format to comply with specification (#7182)
* Initial plan

* Fix SSE streaming format to comply with specification

- Replace json.Encoder with json.Marshal for explicit formatting
- Use explicit \n\n for all SSE messages (instead of relying on implicit newlines)
- Change %v to %s format specifier for proper string formatting
- Fix error message streaming to include proper SSE format
- Ensure consistency between chat.go and completion.go endpoints

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Add proper error handling for JSON marshal failures in streaming

- Handle json.Marshal errors explicitly in error response paths
- Add fallback simple error message if marshal fails
- Prevents sending 'data: <nil>' on marshal failures
- Addresses code review feedback

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Fix SSE streaming format to comply with specification

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Fix finish_reason field to use pointer for proper null handling

- Change FinishReason from string to *string in Choice schema
- Streaming chunks now omit finish_reason (null) instead of empty string
- Final chunks properly set finish_reason to "stop", "tool_calls", etc.
- Remove empty content from initial streaming chunks (only send role)
- Final streaming chunk sends empty delta with finish_reason
- Addresses OpenAI API compliance issues causing client failures

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Improve code consistency for string pointer creation

- Use consistent pattern: declare variable then take address
- Remove inline anonymous function for better readability
- Addresses code review feedback

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Move common finish reasons to constants

- Create constants.go with FinishReasonStop, FinishReasonToolCalls, FinishReasonFunctionCall
- Replace all string literals with constants in chat.go, completion.go, realtime.go
- Improves code maintainability and prevents typos

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

* Make it build

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix finish_reason to always be present with null or string value

- Remove omitempty from FinishReason field in Choice struct
- Explicitly set FinishReason to nil for all streaming chunks
- Ensures finish_reason appears as null in JSON for streaming chunks
- Final chunks still properly set finish_reason to "stop", "tool_calls", etc.
- Complies with OpenAI API specification example

Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-09 22:00:27 +01:00
Mikhail Khludnev
01cd58a739 fix(reranker): support omitting top_n (#7199)
* fix(reranker): support omitting top_n

Signed-off-by: Mikhail Khludnev <mkhl@apache.org>

* fix(reranker): support omitting top_n

Signed-off-by: Mikhail Khludnev <mkhl@apache.org>

* pass 0 explicitly 

Signed-off-by: Mikhail Khludnev <mkhludnev@users.noreply.github.com>

---------

Signed-off-by: Mikhail Khludnev <mkhl@apache.org>
Signed-off-by: Mikhail Khludnev <mkhludnev@users.noreply.github.com>
2025-11-09 18:40:32 +01:00
Ettore Di Giacinto
679d43c2f5 feat: respect context and add request cancellation (#7187)
* feat: respect context

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* workaround fasthttp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(ui): allow to abort call

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Refactor

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore: improving error

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Respect context also with MCP

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tie to both contexts

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Make detection more robust

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-09 18:19:19 +01:00
LocalAI [bot]
4730b52461 chore(model gallery): 🤖 add 1 new models via gallery agent (#7205)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-09 08:40:40 +01:00
LocalAI [bot]
f678c6b0a9 chore: ⬆️ Update ggml-org/llama.cpp to 333f2595a3e0e4c0abf233f2f29ef1710acd134d (#7201)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-08 21:06:17 +00:00
Ettore Di Giacinto
2f2f9beee7 fix(chatterbox): pin numpy (#7198)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-08 16:52:22 +01:00
LocalAI [bot]
8ac7e28c12 chore: ⬆️ Update ggml-org/llama.cpp to 65156105069fa86a4a81b6cb0e8cb583f6420677 (#7184)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-08 09:07:44 +01:00
Ettore Di Giacinto
c5c3538115 chore(ci): change cron schedule to run every 3 hours
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-07 22:36:28 +01:00
Shohidul Bari
5ef16b5693 chore: use air to live reload in dev environment (#7186)
* chore: use air to live reload in dev environment

Signed-off-by: shohidulbari <shohidulbari18@gmail.com>

* chore: update contribuing with live reload option

Signed-off-by: shohidulbari <shohidulbari18@gmail.com>

---------

Signed-off-by: shohidulbari <shohidulbari18@gmail.com>
2025-11-07 21:53:44 +01:00
Ettore Di Giacinto
02cc8cbcaa feat(llama.cpp): consolidate options and respect tokenizer template when enabled (#7120)
* feat(llama.cpp): expose env vars as options for consistency

This allows to configure everything in the YAML file of the model rather
than have global configurations

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(llama.cpp): respect usetokenizertemplate and use llama.cpp templating system to process messages

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* WIP

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Detect template exists if use tokenizer template is enabled

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Better recognization of chat

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixes to support tool calls while using templates from tokenizer

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Drop template guessing, fix passing tools to tokenizer

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Extract grammar and other options from chat template, add schema struct

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* WIP

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* WIP

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Automatically set use_jinja

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Cleanups, identify by default gguf models for chat

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Update docs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-07 21:23:50 +01:00
Ettore Di Giacinto
e5e86d0acb Enhance README with roadmap and getting started info
Updated README to include roadmap link and clarify getting started section.

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-07 16:50:47 +01:00
Ettore Di Giacinto
edd35d2b33 Fix formatting issues in README.md links
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-07 15:57:08 +01:00
LocalAI [bot]
e8cc29e364 chore(model gallery): 🤖 add 1 new models via gallery agent (#7162)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-07 08:40:29 +01:00
LocalAI [bot]
8f7c499f17 chore: ⬆️ Update ggml-org/llama.cpp to 7f09a680af6e0ef612de81018e1d19c19b8651e8 (#7156)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-07 08:38:56 +01:00
dependabot[bot]
ea446fde08 chore(deps): bump github.com/containerd/containerd from 1.7.28 to 1.7.29 in the go_modules group across 1 directory (#7149)
chore(deps): bump github.com/containerd/containerd

Bumps the go_modules group with 1 update in the / directory: [github.com/containerd/containerd](https://github.com/containerd/containerd).


Updates `github.com/containerd/containerd` from 1.7.28 to 1.7.29
- [Release notes](https://github.com/containerd/containerd/releases)
- [Changelog](https://github.com/containerd/containerd/blob/main/RELEASES.md)
- [Commits](https://github.com/containerd/containerd/compare/v1.7.28...v1.7.29)

---
updated-dependencies:
- dependency-name: github.com/containerd/containerd
  dependency-version: 1.7.29
  dependency-type: direct:production
  dependency-group: go_modules
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-06 16:40:49 +00:00
Mikhail Khludnev
122e4c7094 fix(reranker): reproduce ignoring top_n (#7025)
* fix(reranker): reproduce ignoring top_n

Signed-off-by: Mikhail Khludnev <mkhl@apache.org>

* fix(reranker): ignoring top_n

Signed-off-by: Mikhail Khludnev <mkhl@apache.org>

---------

Signed-off-by: Mikhail Khludnev <mkhl@apache.org>
2025-11-06 10:03:05 +00:00
LocalAI [bot]
2573102317 chore(model gallery): 🤖 add 1 new models via gallery agent (#7133)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-06 09:18:59 +01:00
Shohidul Bari
41b60fcfd3 chore: show success toast when system prompt is updated (#7131)
Signed-off-by: shohidulbari <shohidulbari18@gmail.com>
2025-11-06 09:05:57 +01:00
LocalAI [bot]
cb81869140 chore(model gallery): 🤖 add 1 new models via gallery agent (#7127)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-05 22:40:37 +01:00
LocalAI [bot]
db9957b94e chore: ⬆️ Update ggml-org/llama.cpp to a44d77126c911d105f7f800c17da21b2a5b112d1 (#7125)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-05 21:22:04 +00:00
LocalAI [bot]
98158881c2 chore: ⬆️ Update ggml-org/llama.cpp to ad51c0a720062a04349c779aae301ad65ca4c856 (#7098)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-04 21:19:58 +00:00
Ettore Di Giacinto
79247a5d17 Clarify note about DMGs not being signed by Apple
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-04 12:09:28 +01:00
Ettore Di Giacinto
46b7a4c5f2 Add macOS DMG download information
Added download link and note for macOS DMG installation.

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-04 12:09:07 +01:00
Ettore Di Giacinto
436e2d91d0 Enhance overview with Docker and installer details
Added Docker instructions and clarified one-liner installer for Linux.

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-04 12:08:03 +01:00
Ettore Di Giacinto
a86fdc4087 Update binaries.md with macOS download instructions
Added download instructions for macOS DMG file and updated command for Linux and macOS.

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-04 12:06:56 +01:00
Ettore Di Giacinto
c7ac6ca687 Update README with DMG signing note
Added a note about DMG signing and workarounds.

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-04 12:06:01 +01:00
LocalAI [bot]
7088327e8d chore(model gallery): 🤖 add 1 new models via gallery agent (#7077)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-04 09:27:33 +01:00
LocalAI [bot]
e2cb44ef37 chore: ⬆️ Update ggml-org/llama.cpp to c5023daf607c578d6344c628eb7da18ac3d92d32 (#7069)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-04 09:26:10 +01:00
dependabot[bot]
3a40b4129c chore(deps): bump github.com/mudler/edgevpn from 0.31.0 to 0.31.1 (#7055)
Bumps [github.com/mudler/edgevpn](https://github.com/mudler/edgevpn) from 0.31.0 to 0.31.1.
- [Release notes](https://github.com/mudler/edgevpn/releases)
- [Changelog](https://github.com/mudler/edgevpn/blob/master/.goreleaser.yml)
- [Commits](https://github.com/mudler/edgevpn/compare/v0.31.0...v0.31.1)

---
updated-dependencies:
- dependency-name: github.com/mudler/edgevpn
  dependency-version: 0.31.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-04 02:29:50 +00:00
dependabot[bot]
4ca8055f21 chore(deps): bump github.com/valyala/fasthttp from 1.55.0 to 1.68.0 (#7057)
Bumps [github.com/valyala/fasthttp](https://github.com/valyala/fasthttp) from 1.55.0 to 1.68.0.
- [Release notes](https://github.com/valyala/fasthttp/releases)
- [Commits](https://github.com/valyala/fasthttp/compare/v1.55.0...v1.68.0)

---
updated-dependencies:
- dependency-name: github.com/valyala/fasthttp
  dependency-version: 1.68.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-04 01:16:28 +00:00
dependabot[bot]
704786cc6d chore(deps): bump github.com/modelcontextprotocol/go-sdk from 1.0.0 to 1.1.0 (#7053)
chore(deps): bump github.com/modelcontextprotocol/go-sdk

Bumps [github.com/modelcontextprotocol/go-sdk](https://github.com/modelcontextprotocol/go-sdk) from 1.0.0 to 1.1.0.
- [Release notes](https://github.com/modelcontextprotocol/go-sdk/releases)
- [Commits](https://github.com/modelcontextprotocol/go-sdk/compare/v1.0.0...v1.1.0)

---
updated-dependencies:
- dependency-name: github.com/modelcontextprotocol/go-sdk
  dependency-version: 1.1.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-03 23:54:13 +00:00
dependabot[bot]
e5ce1fd9cc chore(deps): bump github.com/onsi/ginkgo/v2 from 2.26.0 to 2.27.2 (#7056)
Bumps [github.com/onsi/ginkgo/v2](https://github.com/onsi/ginkgo) from 2.26.0 to 2.27.2.
- [Release notes](https://github.com/onsi/ginkgo/releases)
- [Changelog](https://github.com/onsi/ginkgo/blob/master/CHANGELOG.md)
- [Commits](https://github.com/onsi/ginkgo/compare/v2.26.0...v2.27.2)

---
updated-dependencies:
- dependency-name: github.com/onsi/ginkgo/v2
  dependency-version: 2.27.2
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-03 21:49:06 +01:00
LocalAI [bot]
ea2037f141 chore(model gallery): 🤖 add 1 new models via gallery agent (#7068)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-03 21:48:04 +01:00
dependabot[bot]
567fa62330 chore(deps): bump github.com/mudler/cogito from 0.4.0 to 0.5.0 (#7054)
Bumps [github.com/mudler/cogito](https://github.com/mudler/cogito) from 0.4.0 to 0.5.0.
- [Release notes](https://github.com/mudler/cogito/releases)
- [Commits](https://github.com/mudler/cogito/compare/v0.4.0...v0.5.0)

---
updated-dependencies:
- dependency-name: github.com/mudler/cogito
  dependency-version: 0.5.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-03 19:50:22 +00:00
Ettore Di Giacinto
d424a27fa2 chore: display warning only when directory is present (#7050)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-03 18:56:47 +01:00
LocalAI [bot]
3ce9cb566d chore(model gallery): 🤖 add 1 new models via gallery agent (#7040)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-03 09:09:50 +01:00
Ettore Di Giacinto
ee7638a9b0 Update index.yaml with new file entry
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-03 08:47:47 +01:00
LocalAI [bot]
e57e50e441 chore(model gallery): 🤖 add 1 new models via gallery agent (#7039)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-03 08:47:16 +01:00
LocalAI [bot]
81880e7975 chore(model-gallery): ⬆️ update checksum (#7024)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-03 08:31:14 +01:00
LocalAI [bot]
2cad2c8591 chore: ⬆️ Update ggml-org/llama.cpp to cd5e3b57541ecc52421130742f4d89acbcf77cd4 (#7023)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-02 21:24:19 +00:00
LocalAI [bot]
b87b41ee45 chore(model gallery): 🤖 add 1 new models via gallery agent (#7017)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-02 17:34:11 +01:00
Ettore Di Giacinto
424acd66ad feat(llama.cpp): allow to set cache-ram and ctx_shift (#7009)
* feat(llama.cpp): allow to set cache-ram and ctx_shift

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Apply suggestion from @mudler

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-02 17:33:29 +01:00
LocalAI [bot]
3cd8234550 chore: ⬆️ Update ggml-org/whisper.cpp to 999a7e0cbf8484dc2cea1e9f855d6b39f34f7ae9 (#6997)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-02 10:39:11 +01:00
LocalAI [bot]
c70a0f05b8 chore(model gallery): 🤖 add 1 new models via gallery agent (#6989)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-02 09:48:12 +01:00
LocalAI [bot]
f85e2dd1b8 chore: ⬆️ Update ggml-org/llama.cpp to 2f68ce7cfd20e9e7098514bf730e5389b7bba908 (#6998)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-02 09:44:37 +01:00
LocalAI [bot]
e485bdf9ab docs: ⬆️ update docs version mudler/LocalAI (#6996)
⬆️ Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-01 21:08:08 +00:00
Ettore Di Giacinto
495c4ee694 Update index.yaml with new model entry
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-11-01 09:21:22 +01:00
LocalAI [bot]
161d1a0344 chore(model gallery): 🤖 add 1 new models via gallery agent (#6982)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-01 09:21:00 +01:00
LocalAI [bot]
b6d1def96f chore(model-gallery): ⬆️ update checksum (#6972)
⬆️ Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-11-01 09:13:42 +01:00
LocalAI [bot]
9ecfdc5938 chore: ⬆️ Update ggml-org/llama.cpp to 31c511a968348281e11d590446bb815048a1e912 (#6970)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-31 21:04:53 +00:00
Ettore Di Giacinto
c332ef5cce chore: fix linting issues
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-10-31 19:08:34 +01:00
Ettore Di Giacinto
6e7a8c6041 chore(model gallery): add qwen3-vl-2b-instruct (#6967)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-10-31 19:04:10 +01:00
Ettore Di Giacinto
43e707ec4f chore(model gallery): add qwen3-vl-2b-thinking (#6966)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-10-31 19:03:23 +01:00
Ettore Di Giacinto
fed3663a74 chore(model gallery): add qwen3-vl-4b-thinking (#6965)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-10-31 19:02:22 +01:00
Ettore Di Giacinto
5b72798db3 chore(model gallery): add qwen3-vl-32b-instruct (#6964)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-10-31 19:01:11 +01:00
Ettore Di Giacinto
d24d6d4e93 chore(model gallery): add qwen3-vl-4b-instruct (#6963)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-10-31 18:57:50 +01:00
Ettore Di Giacinto
50ee1fbe06 chore(model gallery): add qwen3-vl-30b-a3b-thinking (#6962)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-10-31 18:53:13 +01:00
Ettore Di Giacinto
19f3425ce0 chore(model gallery): add huihui-qwen3-vl-30b-a3b-instruct-abliterated (#6961)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-10-31 18:46:25 +01:00
Ettore Di Giacinto
a6ef245534 chore(model gallery): add qwen3-vl-30b-a3b-instruct (#6960)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-10-31 18:37:12 +01:00
LocalAI [bot]
88cb379c2d chore(model gallery): 🤖 add 1 new models via gallery agent (#6940)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-31 16:57:18 +01:00
LocalAI [bot]
0ddb2e8dcf chore: ⬆️ Update ggml-org/llama.cpp to 4146d6a1a6228711a487a1e3e9ddd120f8d027d7 (#6945)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-31 14:51:03 +00:00
Ettore Di Giacinto
91b9301bec Rename workflow from 'Bump dependencies' to 'Bump Documentation'
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-10-31 14:40:50 +01:00
Ettore Di Giacinto
fad5868f7b Rename job to 'bump-backends' in workflow
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-10-31 14:40:34 +01:00
LocalAI [bot]
1e5b9135df chore: ⬆️ Update ggml-org/llama.cpp to 16724b5b6836a2d4b8936a5824d2ff27c52b4517 (#6925)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-30 21:07:33 +00:00
LocalAI [bot]
36d19e23e0 chore(model gallery): 🤖 add 1 new models via gallery agent (#6921)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-30 18:58:08 +01:00
LocalAI [bot]
cba9d1aac0 chore(model gallery): 🤖 add 1 new models via gallery agent (#6919)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-30 17:26:18 +01:00
LocalAI [bot]
dd21a0d2f9 chore: ⬆️ Update ggml-org/llama.cpp to 3464bdac37027c5e9661621fc75ffcef3c19c6ef (#6896)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-30 14:17:58 +01:00
LocalAI [bot]
302a43b3ae chore(model gallery): 🤖 add 1 new models via gallery agent (#6911)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-30 09:54:24 +01:00
LocalAI [bot]
2955061b42 chore(model gallery): 🤖 add 1 new models via gallery agent (#6910)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-30 09:39:31 +01:00
LocalAI [bot]
84644ab693 chore(model gallery): 🤖 add 1 new models via gallery agent (#6908)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-30 09:20:23 +01:00
Ettore Di Giacinto
b8f40dde1e feat: do also text match (#6891)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-10-29 17:18:56 +01:00
LocalAI [bot]
a6c9789a54 chore(model gallery): 🤖 add 1 new models via gallery agent (#6884)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-29 10:56:57 +01:00
LocalAI [bot]
a48d9ce27c chore(model gallery): 🤖 add 1 new models via gallery agent (#6879)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-29 08:19:51 +01:00
LocalAI [bot]
fb825a2708 chore: ⬆️ Update ggml-org/llama.cpp to 851553ea6b24cb39fd5fd188b437d777cb411de8 (#6869)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-29 08:16:55 +01:00
LocalAI [bot]
5558dce449 chore: ⬆️ Update ggml-org/whisper.cpp to c62adfbd1ecdaea9e295c72d672992514a2d887c (#6868)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-28 21:12:05 +00:00
LocalAI [bot]
cf74a11e65 chore(model gallery): 🤖 add 1 new models via gallery agent (#6864)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-28 17:20:57 +01:00
LocalAI [bot]
86b5deec81 chore(model gallery): 🤖 add 1 new models via gallery agent (#6863)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-10-28 16:23:57 +01:00
95 changed files with 5313 additions and 2121 deletions

8
.air.toml Normal file
View File

@@ -0,0 +1,8 @@
# .air.toml
[build]
cmd = "make build"
bin = "./local-ai"
args_bin = [ "--debug" ]
include_ext = ["go", "html", "yaml", "toml", "json", "txt", "md"]
exclude_dir = ["pkg/grpc/proto"]
delay = 1000

View File

@@ -7,8 +7,8 @@ import (
"slices"
"strings"
"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
"github.com/mudler/cogito"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
cogito "github.com/mudler/cogito"
"github.com/mudler/cogito/structures"
"github.com/sashabaranov/go-openai/jsonschema"

View File

@@ -1,39 +0,0 @@
module github.com/go-skynet/LocalAI/.github/gallery-agent
go 1.24.1
require (
github.com/mudler/cogito v0.3.0
github.com/onsi/ginkgo/v2 v2.25.3
github.com/onsi/gomega v1.38.2
github.com/sashabaranov/go-openai v1.41.2
github.com/tmc/langchaingo v0.1.13
gopkg.in/yaml.v3 v3.0.1
)
require (
dario.cat/mergo v1.0.1 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.4.0 // indirect
github.com/Masterminds/sprig/v3 v3.3.0 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/jsonschema-go v0.3.0 // indirect
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/huandu/xstrings v1.5.0 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/modelcontextprotocol/go-sdk v1.0.0 // indirect
github.com/shopspring/decimal v1.4.0 // indirect
github.com/spf13/cast v1.7.0 // indirect
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
go.uber.org/automaxprocs v1.6.0 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/crypto v0.41.0 // indirect
golang.org/x/net v0.43.0 // indirect
golang.org/x/sys v0.35.0 // indirect
golang.org/x/text v0.28.0 // indirect
golang.org/x/tools v0.36.0 // indirect
)

View File

@@ -1,168 +0,0 @@
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw=
github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw=
github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/jsonschema-go v0.3.0 h1:6AH2TxVNtk3IlvkkhjrtbUc4S8AvO0Xii0DxIygDg+Q=
github.com/google/jsonschema-go v0.3.0/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE=
github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/go-archive v0.1.0 h1:Kk/5rdW/g+H8NHdJW2gsXyZ7UnzvJNOy6VKJqueWdcQ=
github.com/moby/go-archive v0.1.0/go.mod h1:G9B+YoujNohJmrIYFBpSd54GTUB4lt9S+xVQvsJyFuo=
github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk=
github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs=
github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/modelcontextprotocol/go-sdk v1.0.0 h1:Z4MSjLi38bTgLrd/LjSmofqRqyBiVKRyQSJgw8q8V74=
github.com/modelcontextprotocol/go-sdk v1.0.0/go.mod h1:nYtYQroQ2KQiM0/SbyEPUWQ6xs4B95gJjEalc9AQyOs=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/mudler/cogito v0.3.0 h1:NbVAO3bLkK5oGSY0xq87jlz8C9OIsLW55s+8Hfzeu9s=
github.com/mudler/cogito v0.3.0/go.mod h1:abMwl+CUjCp87IufA2quZdZt0bbLaHHN79o17HbUKxU=
github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw=
github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE=
github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
github.com/sashabaranov/go-openai v1.41.2 h1:vfPRBZNMpnqu8ELsclWcAvF19lDNgh1t6TVfFFOPiSM=
github.com/sashabaranov/go-openai v1.41.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/shirou/gopsutil/v4 v4.25.5 h1:rtd9piuSMGeU8g1RMXjZs9y9luK5BwtnG7dZaQUJAsc=
github.com/shirou/gopsutil/v4 v4.25.5/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/testcontainers/testcontainers-go v0.38.0 h1:d7uEapLcv2P8AvH8ahLqDMMxda2W9gQN1nRbHS28HBw=
github.com/testcontainers/testcontainers-go v0.38.0/go.mod h1:C52c9MoHpWO+C4aqmgSU+hxlR5jlEayWtgYrb8Pzz1w=
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 h1:Xs2Ncz0gNihqu9iosIZ5SkBbWo5T8JhhLJFMQL1qmLI=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0/go.mod h1:vy+2G/6NvVMpwGX/NyLqcC41fxepnuKHk16E6IZUcJc=
go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -9,7 +9,7 @@ import (
"strings"
"time"
"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
// ProcessedModelFile represents a processed model file with additional metadata

View File

@@ -3,9 +3,9 @@ package main
import (
"fmt"
"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
"github.com/sashabaranov/go-openai"
"github.com/tmc/langchaingo/jsonschema"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
openai "github.com/sashabaranov/go-openai"
jsonschema "github.com/sashabaranov/go-openai/jsonschema"
)
// Get repository README from HF
@@ -13,7 +13,7 @@ type HFReadmeTool struct {
client *hfapi.Client
}
func (s *HFReadmeTool) Run(args map[string]any) (string, error) {
func (s *HFReadmeTool) Execute(args map[string]any) (string, error) {
q, ok := args["repository"].(string)
if !ok {
return "", fmt.Errorf("no query")

View File

@@ -1,10 +1,10 @@
name: Bump dependencies
name: Bump Backend dependencies
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
bump:
bump-backends:
strategy:
fail-fast: false
matrix:

View File

@@ -1,10 +1,10 @@
name: Bump dependencies
name: Bump Documentation
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
bump:
bump-docs:
strategy:
fail-fast: false
matrix:

View File

@@ -33,7 +33,7 @@ jobs:
run: |
CGO_ENABLED=0 make build
- name: rm
uses: appleboy/ssh-action@v1.2.2
uses: appleboy/ssh-action@v1.2.3
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
rm: true
target: ./local-ai
- name: restarting
uses: appleboy/ssh-action@v1.2.2
uses: appleboy/ssh-action@v1.2.3
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}

View File

@@ -2,7 +2,7 @@ name: Gallery Agent
on:
schedule:
- cron: '0 */1 * * *' # Run every 4 hours
- cron: '0 */3 * * *' # Run every 4 hours
workflow_dispatch:
inputs:
search_term:
@@ -39,11 +39,6 @@ jobs:
with:
go-version: '1.21'
- name: Build gallery agent
run: |
cd .github/gallery-agent
go mod download
go build -o gallery-agent .
- name: Run gallery agent
env:
@@ -56,9 +51,7 @@ jobs:
MAX_MODELS: ${{ github.event.inputs.max_models || '1' }}
run: |
export GALLERY_INDEX_PATH=$PWD/gallery/index.yaml
cd .github/gallery-agent
./gallery-agent
rm -rf gallery-agent
go run .github/gallery-agent
- name: Check for changes
id: check_changes

View File

@@ -30,6 +30,7 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
4. Build LocalAI: `make build`
5. Run LocalAI: `./local-ai`
6. To Build and live reload: `make build-dev`
## Contributing
@@ -76,7 +77,7 @@ LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
## Documentation
We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
## Community and Communication
- You can reach out via the Github issue tracker.

View File

@@ -103,6 +103,10 @@ build-launcher: ## Build the launcher application
build-all: build build-launcher ## Build both server and launcher
build-dev: ## Run LocalAI in dev mode with live reload
@command -v air >/dev/null 2>&1 || go install github.com/air-verse/air@latest
air -c .air.toml
dev-dist:
$(GORELEASER) build --snapshot --clean

View File

@@ -43,7 +43,7 @@
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white)](https://t.me/localaiofficial_bot)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
@@ -116,6 +116,8 @@ For more installation options, see [Installer Options](https://localai.io/docs/a
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
</a>
> Note: the DMGs are not signed by Apple as quarantined. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
Or run with docker:
> **💡 Docker Run vs Docker Start**
@@ -200,7 +202,7 @@ local-ai run oci://localai/phi-2:latest
> ⚡ **Automatic Backend Detection**: When you install models from the gallery or YAML files, LocalAI automatically detects your system's GPU capabilities (NVIDIA, AMD, Intel) and downloads the appropriate backend. For advanced configuration options, see [GPU Acceleration](https://localai.io/features/gpu-acceleration/#automatic-backend-detection).
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html), if you are interested in our roadmap items and future enhancements, you can see the [Issues labeled as Roadmap here](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
## 📰 Latest project news

View File

@@ -154,6 +154,8 @@ message PredictOptions {
repeated string Videos = 45;
repeated string Audios = 46;
string CorrelationId = 47;
string Tools = 48; // JSON array of available tools/functions for tool calling
string ToolChoice = 49; // JSON string or object specifying tool choice behavior
}
// The response message containing the result
@@ -382,6 +384,11 @@ message StatusResponse {
message Message {
string role = 1;
string content = 2;
// Optional fields for OpenAI-compatible message format
string name = 3; // Tool name (for tool messages)
string tool_call_id = 4; // Tool call ID (for tool messages)
string reasoning_content = 5; // Reasoning content (for thinking models)
string tool_calls = 6; // Tool calls as JSON string (for assistant messages with tool calls)
}
message DetectOptions {

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=5a4ff43e7dd049e35942bc3d12361dab2f155544
LLAMA_VERSION?=92bb442ad999a0d52df0af2730cd861012e8ac5c
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

File diff suppressed because it is too large Load Diff

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=f16c12f3f55f5bd3d6ac8cf2f31ab90a42c884d5
WHISPER_CPP_VERSION?=a1867e0dad0b21b35afa43fc815dae60c9a139d6
SO_TARGET?=libgowhisper.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

View File

@@ -2,6 +2,7 @@
accelerate
torch
torchaudio
numpy>=1.24.0,<1.26.0
transformers
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster

View File

@@ -2,6 +2,7 @@
torch==2.6.0+cu118
torchaudio==2.6.0+cu118
transformers==4.46.3
numpy>=1.24.0,<1.26.0
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
accelerate

View File

@@ -1,6 +1,7 @@
torch
torchaudio
transformers
numpy>=1.24.0,<1.26.0
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
accelerate

View File

@@ -2,6 +2,7 @@
torch==2.6.0+rocm6.1
torchaudio==2.6.0+rocm6.1
transformers
numpy>=1.24.0,<1.26.0
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
accelerate

View File

@@ -3,6 +3,7 @@ intel-extension-for-pytorch==2.3.110+xpu
torch==2.3.1+cxx11.abi
torchaudio==2.3.1+cxx11.abi
transformers
numpy>=1.24.0,<1.26.0
# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
accelerate

View File

@@ -2,5 +2,6 @@
torch
torchaudio
transformers
numpy>=1.24.0,<1.26.0
chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
accelerate

View File

@@ -61,7 +61,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.PipelineType != "": # Reuse the PipelineType field for language
kwargs['lang'] = request.PipelineType
self.model_name = model_name
self.model = Reranker(model_name, **kwargs)
self.model = Reranker(model_name, **kwargs)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
@@ -75,12 +75,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
documents.append(doc)
ranked_results=self.model.rank(query=request.query, docs=documents, doc_ids=list(range(len(request.documents))))
# Prepare results to return
cropped_results = ranked_results.top_k(request.top_n) if request.top_n > 0 else ranked_results
results = [
backend_pb2.DocumentResult(
index=res.doc_id,
text=res.text,
relevance_score=res.score
) for res in ranked_results.results
) for res in (cropped_results)
]
# Calculate the usage and total tokens

View File

@@ -76,7 +76,7 @@ class TestBackendServicer(unittest.TestCase):
)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
self.assertTrue(response.success)
rerank_response = stub.Rerank(request)
print(rerank_response.results[0])
self.assertIsNotNone(rerank_response.results)
@@ -87,4 +87,60 @@ class TestBackendServicer(unittest.TestCase):
print(err)
self.fail("Reranker service failed")
finally:
self.tearDown()
self.tearDown()
def test_rerank_omit_top_n(self):
"""
This method tests if the embeddings are generated successfully even top_n is omitted
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
request = backend_pb2.RerankRequest(
query="I love you",
documents=["I hate you", "I really like you"],
top_n=0 #
)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
self.assertTrue(response.success)
rerank_response = stub.Rerank(request)
print(rerank_response.results[0])
self.assertIsNotNone(rerank_response.results)
self.assertEqual(len(rerank_response.results), 2)
self.assertEqual(rerank_response.results[0].text, "I really like you")
self.assertEqual(rerank_response.results[1].text, "I hate you")
except Exception as err:
print(err)
self.fail("Reranker service failed")
finally:
self.tearDown()
def test_rerank_crop(self):
"""
This method tests top_n cropping
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
request = backend_pb2.RerankRequest(
query="I love you",
documents=["I hate you", "I really like you", "I hate ignoring top_n"],
top_n=2
)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
self.assertTrue(response.success)
rerank_response = stub.Rerank(request)
print(rerank_response.results[0])
self.assertIsNotNone(rerank_response.results)
self.assertEqual(len(rerank_response.results), 2)
self.assertEqual(rerank_response.results[0].text, "I really like you")
self.assertEqual(rerank_response.results[1].text, "I hate you")
except Exception as err:
print(err)
self.fail("Reranker service failed")
finally:
self.tearDown()

View File

@@ -22,9 +22,15 @@ func New(opts ...config.AppOption) (*Application, error) {
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath)
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
if err := application.start(); err != nil {
return nil, err
}
caps, err := xsysinfo.CPUCapabilities()
if err == nil {
log.Debug().Msgf("CPU capabilities: %v", caps)
}
gpus, err := xsysinfo.GPUs()
if err == nil {
@@ -56,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
}
}
if err := coreStartup.InstallModels(options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
if err := coreStartup.InstallModels(application.GalleryService(), options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
log.Error().Err(err).Msg("error installing models")
}
@@ -152,10 +158,6 @@ func New(opts ...config.AppOption) (*Application, error) {
// Watch the configuration directory
startWatcher(options)
if err := application.start(); err != nil {
return nil, err
}
log.Info().Msg("core/startup process completed!")
return application, nil
}

View File

@@ -2,8 +2,6 @@ package backend
import (
"context"
"encoding/json"
"fmt"
"regexp"
"slices"
"strings"
@@ -35,7 +33,7 @@ type TokenUsage struct {
TimingTokenGeneration float64
}
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
func ModelInference(ctx context.Context, s string, messages schema.Messages, images, videos, audios []string, loader *model.ModelLoader, c *config.ModelConfig, cl *config.ModelConfigLoader, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool, tools string, toolChoice string) (func() (LLMResponse, error), error) {
modelFile := c.Model
// Check if the modelFile exists, if it doesn't try to load it from the gallery
@@ -65,29 +63,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
var protoMessages []*proto.Message
// if we are using the tokenizer template, we need to convert the messages to proto messages
// unless the prompt has already been tokenized (non-chat endpoints + functions)
if c.TemplateConfig.UseTokenizerTemplate && s == "" {
protoMessages = make([]*proto.Message, len(messages), len(messages))
for i, message := range messages {
protoMessages[i] = &proto.Message{
Role: message.Role,
}
switch ct := message.Content.(type) {
case string:
protoMessages[i].Content = ct
case []interface{}:
// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
data, _ := json.Marshal(ct)
resultData := []struct {
Text string `json:"text"`
}{}
json.Unmarshal(data, &resultData)
for _, r := range resultData {
protoMessages[i].Content += r.Text
}
default:
return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
}
}
if c.TemplateConfig.UseTokenizerTemplate && len(messages) > 0 {
protoMessages = messages.ToProto()
}
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
@@ -99,6 +76,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
opts.Images = images
opts.Videos = videos
opts.Audios = audios
opts.Tools = tools
opts.ToolChoice = toolChoice
tokenUsage := TokenUsage{}

View File

@@ -1,12 +1,14 @@
package cli
import (
"context"
"encoding/json"
"errors"
"fmt"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/startup"
@@ -78,6 +80,12 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
return err
}
galleryService := services.NewGalleryService(&config.ApplicationConfig{}, model.NewModelLoader(systemState, true))
err = galleryService.Start(context.Background(), config.NewModelConfigLoader(mi.ModelsPath), systemState)
if err != nil {
return err
}
var galleries []config.Gallery
if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
log.Error().Err(err).Msg("unable to load galleries")
@@ -127,7 +135,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
}
modelLoader := model.NewModelLoader(systemState, true)
err = startup.InstallModels(galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
err = startup.InstallModels(galleryService, galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
if err != nil {
return err
}

View File

@@ -1,151 +1,17 @@
package config
import (
"strings"
"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log"
gguf "github.com/gpustack/gguf-parser-go"
)
type familyType uint8
const (
Unknown familyType = iota
LLaMa3
CommandR
Phi3
ChatML
Mistral03
Gemma
DeepSeek2
)
const (
defaultContextSize = 1024
defaultNGPULayers = 99999999
)
type settingsConfig struct {
StopWords []string
TemplateConfig TemplateConfig
RepeatPenalty float64
}
// default settings to adopt with a given model family
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
Gemma: {
RepeatPenalty: 1.0,
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
TemplateConfig: TemplateConfig{
Chat: "{{.Input }}\n<start_of_turn>model\n",
ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
Completion: "{{.Input}}",
},
},
DeepSeek2: {
StopWords: []string{"<end▁of▁sentence>"},
TemplateConfig: TemplateConfig{
ChatMessage: `{{if eq .RoleName "user" -}}User: {{.Content }}
{{ end -}}
{{if eq .RoleName "assistant" -}}Assistant: {{.Content}}<end▁of▁sentence>{{end}}
{{if eq .RoleName "system" -}}{{.Content}}
{{end -}}`,
Chat: "{{.Input -}}\nAssistant: ",
},
},
LLaMa3: {
StopWords: []string{"<|eot_id|>"},
TemplateConfig: TemplateConfig{
Chat: "<|begin_of_text|>{{.Input }}\n<|start_header_id|>assistant<|end_header_id|>",
ChatMessage: "<|start_header_id|>{{ .RoleName }}<|end_header_id|>\n\n{{.Content }}<|eot_id|>",
},
},
CommandR: {
TemplateConfig: TemplateConfig{
Chat: "{{.Input -}}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
Functions: `<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
You are a function calling AI model, you can call the following functions:
## Available Tools
{{range .Functions}}
- {"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}
{{end}}
When using a tool, reply with JSON, for instance {"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Input -}}`,
ChatMessage: `{{if eq .RoleName "user" -}}
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
{{- else if eq .RoleName "system" -}}
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
{{- else if eq .RoleName "assistant" -}}
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
{{- else if eq .RoleName "tool" -}}
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
{{- else if .FunctionCall -}}
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{toJson .FunctionCall}}}<|END_OF_TURN_TOKEN|>
{{- end -}}`,
},
StopWords: []string{"<|END_OF_TURN_TOKEN|>"},
},
Phi3: {
TemplateConfig: TemplateConfig{
Chat: "{{.Input}}\n<|assistant|>",
ChatMessage: "<|{{ .RoleName }}|>\n{{.Content}}<|end|>",
Completion: "{{.Input}}",
},
StopWords: []string{"<|end|>", "<|endoftext|>"},
},
ChatML: {
TemplateConfig: TemplateConfig{
Chat: "{{.Input -}}\n<|im_start|>assistant",
Functions: `<|im_start|>system
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
For each function call return a json object with function name and arguments
<|im_end|>
{{.Input -}}
<|im_start|>assistant`,
ChatMessage: `<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>`,
},
StopWords: []string{"<|im_end|>", "<dummy32000>", "</s>"},
},
Mistral03: {
TemplateConfig: TemplateConfig{
Chat: "{{.Input -}}",
Functions: `[AVAILABLE_TOOLS] [{{range .Functions}}{"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}{{end}} ] [/AVAILABLE_TOOLS]{{.Input }}`,
ChatMessage: `{{if eq .RoleName "user" -}}
[INST] {{.Content }} [/INST]
{{- else if .FunctionCall -}}
[TOOL_CALLS] {{toJson .FunctionCall}} [/TOOL_CALLS]
{{- else if eq .RoleName "tool" -}}
[TOOL_RESULTS] {{.Content}} [/TOOL_RESULTS]
{{- else -}}
{{ .Content -}}
{{ end -}}`,
},
StopWords: []string{"<|im_end|>", "<dummy32000>", "</tool_call>", "<|eot_id|>", "<|end_of_text|>", "</s>", "[/TOOL_CALLS]", "[/ACTIONS]"},
},
}
// this maps well known template used in HF to model families defined above
var knownTemplates = map[string]familyType{
`{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}`: ChatML,
`{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`: Mistral03,
}
func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
if defaultCtx == 0 && cfg.ContextSize == nil {
@@ -216,81 +82,9 @@ func guessGGUFFromFile(cfg *ModelConfig, f *gguf.GGUFFile, defaultCtx int) {
cfg.Name = f.Metadata().Name
}
family := identifyFamily(f)
if family == Unknown {
log.Debug().Msgf("guessDefaultsFromFile: %s", "family not identified")
return
}
// identify template
settings, ok := defaultsSettings[family]
if ok {
cfg.TemplateConfig = settings.TemplateConfig
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: guessed template %+v", cfg.TemplateConfig)
if len(cfg.StopWords) == 0 {
cfg.StopWords = settings.StopWords
}
if cfg.RepeatPenalty == 0.0 {
cfg.RepeatPenalty = settings.RepeatPenalty
}
} else {
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
}
if cfg.HasTemplate() {
return
}
// identify from well known templates first, otherwise use the raw jinja template
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
if found {
// try to use the jinja template
cfg.TemplateConfig.JinjaTemplate = true
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
}
}
func identifyFamily(f *gguf.GGUFFile) familyType {
// identify from well known templates first
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
if found && chatTemplate.ValueString() != "" {
if family, ok := knownTemplates[chatTemplate.ValueString()]; ok {
return family
}
}
// otherwise try to identify from the model properties
arch := f.Architecture().Architecture
eosTokenID := f.Tokenizer().EOSTokenID
bosTokenID := f.Tokenizer().BOSTokenID
isYI := arch == "llama" && bosTokenID == 1 && eosTokenID == 2
// WTF! Mistral0.3 and isYi have same bosTokenID and eosTokenID
llama3 := arch == "llama" && eosTokenID == 128009
commandR := arch == "command-r" && eosTokenID == 255001
qwen2 := arch == "qwen2"
phi3 := arch == "phi-3"
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Metadata().Name), "gemma")
deepseek2 := arch == "deepseek2"
switch {
case deepseek2:
return DeepSeek2
case gemma:
return Gemma
case llama3:
return LLaMa3
case commandR:
return CommandR
case phi3:
return Phi3
case qwen2, isYI:
return ChatML
default:
return Unknown
}
// Instruct to use template from llama.cpp
cfg.TemplateConfig.UseTokenizerTemplate = true
cfg.FunctionsConfig.GrammarConfig.NoGrammar = true
cfg.Options = append(cfg.Options, "use_jinja:true")
cfg.KnownUsecaseStrings = append(cfg.KnownUsecaseStrings, "FLAG_CHAT")
}

View File

@@ -16,30 +16,31 @@ const (
RAND_SEED = -1
)
// @Description TTS configuration
type TTSConfig struct {
// Voice wav path or id
Voice string `yaml:"voice" json:"voice"`
Voice string `yaml:"voice,omitempty" json:"voice,omitempty"`
AudioPath string `yaml:"audio_path" json:"audio_path"`
AudioPath string `yaml:"audio_path,omitempty" json:"audio_path,omitempty"`
}
// ModelConfig represents a model configuration
// @Description ModelConfig represents a model configuration
type ModelConfig struct {
modelConfigFile string `yaml:"-" json:"-"`
schema.PredictionOptions `yaml:"parameters" json:"parameters"`
Name string `yaml:"name" json:"name"`
schema.PredictionOptions `yaml:"parameters,omitempty" json:"parameters,omitempty"`
Name string `yaml:"name,omitempty" json:"name,omitempty"`
F16 *bool `yaml:"f16" json:"f16"`
Threads *int `yaml:"threads" json:"threads"`
Debug *bool `yaml:"debug" json:"debug"`
Roles map[string]string `yaml:"roles" json:"roles"`
Embeddings *bool `yaml:"embeddings" json:"embeddings"`
Backend string `yaml:"backend" json:"backend"`
TemplateConfig TemplateConfig `yaml:"template" json:"template"`
KnownUsecaseStrings []string `yaml:"known_usecases" json:"known_usecases"`
F16 *bool `yaml:"f16,omitempty" json:"f16,omitempty"`
Threads *int `yaml:"threads,omitempty" json:"threads,omitempty"`
Debug *bool `yaml:"debug,omitempty" json:"debug,omitempty"`
Roles map[string]string `yaml:"roles,omitempty" json:"roles,omitempty"`
Embeddings *bool `yaml:"embeddings,omitempty" json:"embeddings,omitempty"`
Backend string `yaml:"backend,omitempty" json:"backend,omitempty"`
TemplateConfig TemplateConfig `yaml:"template,omitempty" json:"template,omitempty"`
KnownUsecaseStrings []string `yaml:"known_usecases,omitempty" json:"known_usecases,omitempty"`
KnownUsecases *ModelConfigUsecases `yaml:"-" json:"-"`
Pipeline Pipeline `yaml:"pipeline" json:"pipeline"`
Pipeline Pipeline `yaml:"pipeline,omitempty" json:"pipeline,omitempty"`
PromptStrings, InputStrings []string `yaml:"-" json:"-"`
InputToken [][]int `yaml:"-" json:"-"`
@@ -47,96 +48,101 @@ type ModelConfig struct {
ResponseFormat string `yaml:"-" json:"-"`
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
FunctionsConfig functions.FunctionsConfig `yaml:"function" json:"function"`
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
FeatureFlag FeatureFlag `yaml:"feature_flags" json:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)
LLMConfig `yaml:",inline" json:",inline"`
// Diffusers
Diffusers Diffusers `yaml:"diffusers" json:"diffusers"`
Step int `yaml:"step" json:"step"`
Diffusers Diffusers `yaml:"diffusers,omitempty" json:"diffusers,omitempty"`
Step int `yaml:"step,omitempty" json:"step,omitempty"`
// GRPC Options
GRPC GRPC `yaml:"grpc" json:"grpc"`
GRPC GRPC `yaml:"grpc,omitempty" json:"grpc,omitempty"`
// TTS specifics
TTSConfig `yaml:"tts" json:"tts"`
TTSConfig `yaml:"tts,omitempty" json:"tts,omitempty"`
// CUDA
// Explicitly enable CUDA or not (some backends might need it)
CUDA bool `yaml:"cuda" json:"cuda"`
CUDA bool `yaml:"cuda,omitempty" json:"cuda,omitempty"`
DownloadFiles []File `yaml:"download_files" json:"download_files"`
DownloadFiles []File `yaml:"download_files,omitempty" json:"download_files,omitempty"`
Description string `yaml:"description" json:"description"`
Usage string `yaml:"usage" json:"usage"`
Description string `yaml:"description,omitempty" json:"description,omitempty"`
Usage string `yaml:"usage,omitempty" json:"usage,omitempty"`
Options []string `yaml:"options" json:"options"`
Overrides []string `yaml:"overrides" json:"overrides"`
Options []string `yaml:"options,omitempty" json:"options,omitempty"`
Overrides []string `yaml:"overrides,omitempty" json:"overrides,omitempty"`
MCP MCPConfig `yaml:"mcp" json:"mcp"`
Agent AgentConfig `yaml:"agent" json:"agent"`
MCP MCPConfig `yaml:"mcp,omitempty" json:"mcp,omitempty"`
Agent AgentConfig `yaml:"agent,omitempty" json:"agent,omitempty"`
}
// @Description MCP configuration
type MCPConfig struct {
Servers string `yaml:"remote" json:"remote"`
Stdio string `yaml:"stdio" json:"stdio"`
Servers string `yaml:"remote,omitempty" json:"remote,omitempty"`
Stdio string `yaml:"stdio,omitempty" json:"stdio,omitempty"`
}
// @Description Agent configuration
type AgentConfig struct {
MaxAttempts int `yaml:"max_attempts" json:"max_attempts"`
MaxIterations int `yaml:"max_iterations" json:"max_iterations"`
EnableReasoning bool `yaml:"enable_reasoning" json:"enable_reasoning"`
EnablePlanning bool `yaml:"enable_planning" json:"enable_planning"`
EnableMCPPrompts bool `yaml:"enable_mcp_prompts" json:"enable_mcp_prompts"`
EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator" json:"enable_plan_re_evaluator"`
MaxAttempts int `yaml:"max_attempts,omitempty" json:"max_attempts,omitempty"`
MaxIterations int `yaml:"max_iterations,omitempty" json:"max_iterations,omitempty"`
EnableReasoning bool `yaml:"enable_reasoning,omitempty" json:"enable_reasoning,omitempty"`
EnablePlanning bool `yaml:"enable_planning,omitempty" json:"enable_planning,omitempty"`
EnableMCPPrompts bool `yaml:"enable_mcp_prompts,omitempty" json:"enable_mcp_prompts,omitempty"`
EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator,omitempty" json:"enable_plan_re_evaluator,omitempty"`
}
func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCPGenericConfig[MCPSTDIOServers]) {
func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCPGenericConfig[MCPSTDIOServers], error) {
var remote MCPGenericConfig[MCPRemoteServers]
var stdio MCPGenericConfig[MCPSTDIOServers]
if err := yaml.Unmarshal([]byte(c.Servers), &remote); err != nil {
return remote, stdio
return remote, stdio, err
}
if err := yaml.Unmarshal([]byte(c.Stdio), &stdio); err != nil {
return remote, stdio
return remote, stdio, err
}
return remote, stdio
return remote, stdio, nil
}
// @Description MCP generic configuration
type MCPGenericConfig[T any] struct {
Servers T `yaml:"mcpServers" json:"mcpServers"`
Servers T `yaml:"mcpServers,omitempty" json:"mcpServers,omitempty"`
}
type MCPRemoteServers map[string]MCPRemoteServer
type MCPSTDIOServers map[string]MCPSTDIOServer
// @Description MCP remote server configuration
type MCPRemoteServer struct {
URL string `json:"url"`
Token string `json:"token"`
URL string `json:"url,omitempty"`
Token string `json:"token,omitempty"`
}
// @Description MCP STDIO server configuration
type MCPSTDIOServer struct {
Args []string `json:"args"`
Env map[string]string `json:"env"`
Command string `json:"command"`
Args []string `json:"args,omitempty"`
Env map[string]string `json:"env,omitempty"`
Command string `json:"command,omitempty"`
}
// Pipeline defines other models to use for audio-to-audio
// @Description Pipeline defines other models to use for audio-to-audio
type Pipeline struct {
TTS string `yaml:"tts" json:"tts"`
LLM string `yaml:"llm" json:"llm"`
Transcription string `yaml:"transcription" json:"transcription"`
VAD string `yaml:"vad" json:"vad"`
TTS string `yaml:"tts,omitempty" json:"tts,omitempty"`
LLM string `yaml:"llm,omitempty" json:"llm,omitempty"`
Transcription string `yaml:"transcription,omitempty" json:"transcription,omitempty"`
VAD string `yaml:"vad,omitempty" json:"vad,omitempty"`
}
// @Description File configuration for model downloads
type File struct {
Filename string `yaml:"filename" json:"filename"`
SHA256 string `yaml:"sha256" json:"sha256"`
URI downloader.URI `yaml:"uri" json:"uri"`
Filename string `yaml:"filename,omitempty" json:"filename,omitempty"`
SHA256 string `yaml:"sha256,omitempty" json:"sha256,omitempty"`
URI downloader.URI `yaml:"uri,omitempty" json:"uri,omitempty"`
}
type FeatureFlag map[string]*bool
@@ -148,126 +154,136 @@ func (ff FeatureFlag) Enabled(s string) bool {
return false
}
// @Description GRPC configuration
type GRPC struct {
Attempts int `yaml:"attempts" json:"attempts"`
AttemptsSleepTime int `yaml:"attempts_sleep_time" json:"attempts_sleep_time"`
Attempts int `yaml:"attempts,omitempty" json:"attempts,omitempty"`
AttemptsSleepTime int `yaml:"attempts_sleep_time,omitempty" json:"attempts_sleep_time,omitempty"`
}
// @Description Diffusers configuration
type Diffusers struct {
CUDA bool `yaml:"cuda" json:"cuda"`
PipelineType string `yaml:"pipeline_type" json:"pipeline_type"`
SchedulerType string `yaml:"scheduler_type" json:"scheduler_type"`
EnableParameters string `yaml:"enable_parameters" json:"enable_parameters"` // A list of comma separated parameters to specify
IMG2IMG bool `yaml:"img2img" json:"img2img"` // Image to Image Diffuser
ClipSkip int `yaml:"clip_skip" json:"clip_skip"` // Skip every N frames
ClipModel string `yaml:"clip_model" json:"clip_model"` // Clip model to use
ClipSubFolder string `yaml:"clip_subfolder" json:"clip_subfolder"` // Subfolder to use for clip model
ControlNet string `yaml:"control_net" json:"control_net"`
CUDA bool `yaml:"cuda,omitempty" json:"cuda,omitempty"`
PipelineType string `yaml:"pipeline_type,omitempty" json:"pipeline_type,omitempty"`
SchedulerType string `yaml:"scheduler_type,omitempty" json:"scheduler_type,omitempty"`
EnableParameters string `yaml:"enable_parameters,omitempty" json:"enable_parameters,omitempty"` // A list of comma separated parameters to specify
IMG2IMG bool `yaml:"img2img,omitempty" json:"img2img,omitempty"` // Image to Image Diffuser
ClipSkip int `yaml:"clip_skip,omitempty" json:"clip_skip,omitempty"` // Skip every N frames
ClipModel string `yaml:"clip_model,omitempty" json:"clip_model,omitempty"` // Clip model to use
ClipSubFolder string `yaml:"clip_subfolder,omitempty" json:"clip_subfolder,omitempty"` // Subfolder to use for clip model
ControlNet string `yaml:"control_net,omitempty" json:"control_net,omitempty"`
}
// LLMConfig is a struct that holds the configuration that are
// generic for most of the LLM backends.
// @Description LLMConfig is a struct that holds the configuration that are generic for most of the LLM backends.
type LLMConfig struct {
SystemPrompt string `yaml:"system_prompt" json:"system_prompt"`
TensorSplit string `yaml:"tensor_split" json:"tensor_split"`
MainGPU string `yaml:"main_gpu" json:"main_gpu"`
RMSNormEps float32 `yaml:"rms_norm_eps" json:"rms_norm_eps"`
NGQA int32 `yaml:"ngqa" json:"ngqa"`
PromptCachePath string `yaml:"prompt_cache_path" json:"prompt_cache_path"`
PromptCacheAll bool `yaml:"prompt_cache_all" json:"prompt_cache_all"`
PromptCacheRO bool `yaml:"prompt_cache_ro" json:"prompt_cache_ro"`
MirostatETA *float64 `yaml:"mirostat_eta" json:"mirostat_eta"`
MirostatTAU *float64 `yaml:"mirostat_tau" json:"mirostat_tau"`
Mirostat *int `yaml:"mirostat" json:"mirostat"`
NGPULayers *int `yaml:"gpu_layers" json:"gpu_layers"`
MMap *bool `yaml:"mmap" json:"mmap"`
MMlock *bool `yaml:"mmlock" json:"mmlock"`
LowVRAM *bool `yaml:"low_vram" json:"low_vram"`
Reranking *bool `yaml:"reranking" json:"reranking"`
Grammar string `yaml:"grammar" json:"grammar"`
StopWords []string `yaml:"stopwords" json:"stopwords"`
Cutstrings []string `yaml:"cutstrings" json:"cutstrings"`
ExtractRegex []string `yaml:"extract_regex" json:"extract_regex"`
TrimSpace []string `yaml:"trimspace" json:"trimspace"`
TrimSuffix []string `yaml:"trimsuffix" json:"trimsuffix"`
SystemPrompt string `yaml:"system_prompt,omitempty" json:"system_prompt,omitempty"`
TensorSplit string `yaml:"tensor_split,omitempty" json:"tensor_split,omitempty"`
MainGPU string `yaml:"main_gpu,omitempty" json:"main_gpu,omitempty"`
RMSNormEps float32 `yaml:"rms_norm_eps,omitempty" json:"rms_norm_eps,omitempty"`
NGQA int32 `yaml:"ngqa,omitempty" json:"ngqa,omitempty"`
PromptCachePath string `yaml:"prompt_cache_path,omitempty" json:"prompt_cache_path,omitempty"`
PromptCacheAll bool `yaml:"prompt_cache_all,omitempty" json:"prompt_cache_all,omitempty"`
PromptCacheRO bool `yaml:"prompt_cache_ro,omitempty" json:"prompt_cache_ro,omitempty"`
MirostatETA *float64 `yaml:"mirostat_eta,omitempty" json:"mirostat_eta,omitempty"`
MirostatTAU *float64 `yaml:"mirostat_tau,omitempty" json:"mirostat_tau,omitempty"`
Mirostat *int `yaml:"mirostat,omitempty" json:"mirostat,omitempty"`
NGPULayers *int `yaml:"gpu_layers,omitempty" json:"gpu_layers,omitempty"`
MMap *bool `yaml:"mmap,omitempty" json:"mmap,omitempty"`
MMlock *bool `yaml:"mmlock,omitempty" json:"mmlock,omitempty"`
LowVRAM *bool `yaml:"low_vram,omitempty" json:"low_vram,omitempty"`
Reranking *bool `yaml:"reranking,omitempty" json:"reranking,omitempty"`
Grammar string `yaml:"grammar,omitempty" json:"grammar,omitempty"`
StopWords []string `yaml:"stopwords,omitempty" json:"stopwords,omitempty"`
Cutstrings []string `yaml:"cutstrings,omitempty" json:"cutstrings,omitempty"`
ExtractRegex []string `yaml:"extract_regex,omitempty" json:"extract_regex,omitempty"`
TrimSpace []string `yaml:"trimspace,omitempty" json:"trimspace,omitempty"`
TrimSuffix []string `yaml:"trimsuffix,omitempty" json:"trimsuffix,omitempty"`
ContextSize *int `yaml:"context_size" json:"context_size"`
NUMA bool `yaml:"numa" json:"numa"`
LoraAdapter string `yaml:"lora_adapter" json:"lora_adapter"`
LoraBase string `yaml:"lora_base" json:"lora_base"`
LoraAdapters []string `yaml:"lora_adapters" json:"lora_adapters"`
LoraScales []float32 `yaml:"lora_scales" json:"lora_scales"`
LoraScale float32 `yaml:"lora_scale" json:"lora_scale"`
NoMulMatQ bool `yaml:"no_mulmatq" json:"no_mulmatq"`
DraftModel string `yaml:"draft_model" json:"draft_model"`
NDraft int32 `yaml:"n_draft" json:"n_draft"`
Quantization string `yaml:"quantization" json:"quantization"`
LoadFormat string `yaml:"load_format" json:"load_format"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization" json:"gpu_memory_utilization"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code" json:"trust_remote_code"` // vLLM
EnforceEager bool `yaml:"enforce_eager" json:"enforce_eager"` // vLLM
SwapSpace int `yaml:"swap_space" json:"swap_space"` // vLLM
MaxModelLen int `yaml:"max_model_len" json:"max_model_len"` // vLLM
TensorParallelSize int `yaml:"tensor_parallel_size" json:"tensor_parallel_size"` // vLLM
DisableLogStatus bool `yaml:"disable_log_stats" json:"disable_log_stats"` // vLLM
DType string `yaml:"dtype" json:"dtype"` // vLLM
LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt" json:"limit_mm_per_prompt"` // vLLM
MMProj string `yaml:"mmproj" json:"mmproj"`
ContextSize *int `yaml:"context_size,omitempty" json:"context_size,omitempty"`
NUMA bool `yaml:"numa,omitempty" json:"numa,omitempty"`
LoraAdapter string `yaml:"lora_adapter,omitempty" json:"lora_adapter,omitempty"`
LoraBase string `yaml:"lora_base,omitempty" json:"lora_base,omitempty"`
LoraAdapters []string `yaml:"lora_adapters,omitempty" json:"lora_adapters,omitempty"`
LoraScales []float32 `yaml:"lora_scales,omitempty" json:"lora_scales,omitempty"`
LoraScale float32 `yaml:"lora_scale,omitempty" json:"lora_scale,omitempty"`
NoMulMatQ bool `yaml:"no_mulmatq,omitempty" json:"no_mulmatq,omitempty"`
DraftModel string `yaml:"draft_model,omitempty" json:"draft_model,omitempty"`
NDraft int32 `yaml:"n_draft,omitempty" json:"n_draft,omitempty"`
Quantization string `yaml:"quantization,omitempty" json:"quantization,omitempty"`
LoadFormat string `yaml:"load_format,omitempty" json:"load_format,omitempty"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization,omitempty" json:"gpu_memory_utilization,omitempty"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code,omitempty" json:"trust_remote_code,omitempty"` // vLLM
EnforceEager bool `yaml:"enforce_eager,omitempty" json:"enforce_eager,omitempty"` // vLLM
SwapSpace int `yaml:"swap_space,omitempty" json:"swap_space,omitempty"` // vLLM
MaxModelLen int `yaml:"max_model_len,omitempty" json:"max_model_len,omitempty"` // vLLM
TensorParallelSize int `yaml:"tensor_parallel_size,omitempty" json:"tensor_parallel_size,omitempty"` // vLLM
DisableLogStatus bool `yaml:"disable_log_stats,omitempty" json:"disable_log_stats,omitempty"` // vLLM
DType string `yaml:"dtype,omitempty" json:"dtype,omitempty"` // vLLM
LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt,omitempty" json:"limit_mm_per_prompt,omitempty"` // vLLM
MMProj string `yaml:"mmproj,omitempty" json:"mmproj,omitempty"`
FlashAttention *string `yaml:"flash_attention" json:"flash_attention"`
NoKVOffloading bool `yaml:"no_kv_offloading" json:"no_kv_offloading"`
CacheTypeK string `yaml:"cache_type_k" json:"cache_type_k"`
CacheTypeV string `yaml:"cache_type_v" json:"cache_type_v"`
FlashAttention *string `yaml:"flash_attention,omitempty" json:"flash_attention,omitempty"`
NoKVOffloading bool `yaml:"no_kv_offloading,omitempty" json:"no_kv_offloading,omitempty"`
CacheTypeK string `yaml:"cache_type_k,omitempty" json:"cache_type_k,omitempty"`
CacheTypeV string `yaml:"cache_type_v,omitempty" json:"cache_type_v,omitempty"`
RopeScaling string `yaml:"rope_scaling" json:"rope_scaling"`
ModelType string `yaml:"type" json:"type"`
RopeScaling string `yaml:"rope_scaling,omitempty" json:"rope_scaling,omitempty"`
ModelType string `yaml:"type,omitempty" json:"type,omitempty"`
YarnExtFactor float32 `yaml:"yarn_ext_factor" json:"yarn_ext_factor"`
YarnAttnFactor float32 `yaml:"yarn_attn_factor" json:"yarn_attn_factor"`
YarnBetaFast float32 `yaml:"yarn_beta_fast" json:"yarn_beta_fast"`
YarnBetaSlow float32 `yaml:"yarn_beta_slow" json:"yarn_beta_slow"`
YarnExtFactor float32 `yaml:"yarn_ext_factor,omitempty" json:"yarn_ext_factor,omitempty"`
YarnAttnFactor float32 `yaml:"yarn_attn_factor,omitempty" json:"yarn_attn_factor,omitempty"`
YarnBetaFast float32 `yaml:"yarn_beta_fast,omitempty" json:"yarn_beta_fast,omitempty"`
YarnBetaSlow float32 `yaml:"yarn_beta_slow,omitempty" json:"yarn_beta_slow,omitempty"`
CFGScale float32 `yaml:"cfg_scale" json:"cfg_scale"` // Classifier-Free Guidance Scale
CFGScale float32 `yaml:"cfg_scale,omitempty" json:"cfg_scale,omitempty"` // Classifier-Free Guidance Scale
}
// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
// @Description LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
type LimitMMPerPrompt struct {
LimitImagePerPrompt int `yaml:"image" json:"image"`
LimitVideoPerPrompt int `yaml:"video" json:"video"`
LimitAudioPerPrompt int `yaml:"audio" json:"audio"`
LimitImagePerPrompt int `yaml:"image,omitempty" json:"image,omitempty"`
LimitVideoPerPrompt int `yaml:"video,omitempty" json:"video,omitempty"`
LimitAudioPerPrompt int `yaml:"audio,omitempty" json:"audio,omitempty"`
}
// TemplateConfig is a struct that holds the configuration of the templating system
// @Description TemplateConfig is a struct that holds the configuration of the templating system
type TemplateConfig struct {
// Chat is the template used in the chat completion endpoint
Chat string `yaml:"chat" json:"chat"`
Chat string `yaml:"chat,omitempty" json:"chat,omitempty"`
// ChatMessage is the template used for chat messages
ChatMessage string `yaml:"chat_message" json:"chat_message"`
ChatMessage string `yaml:"chat_message,omitempty" json:"chat_message,omitempty"`
// Completion is the template used for completion requests
Completion string `yaml:"completion" json:"completion"`
Completion string `yaml:"completion,omitempty" json:"completion,omitempty"`
// Edit is the template used for edit completion requests
Edit string `yaml:"edit" json:"edit"`
Edit string `yaml:"edit,omitempty" json:"edit,omitempty"`
// Functions is the template used when tools are present in the client requests
Functions string `yaml:"function" json:"function"`
Functions string `yaml:"function,omitempty" json:"function,omitempty"`
// UseTokenizerTemplate is a flag that indicates if the tokenizer template should be used.
// Note: this is mostly consumed for backends such as vllm and transformers
// that can use the tokenizers specified in the JSON config files of the models
UseTokenizerTemplate bool `yaml:"use_tokenizer_template" json:"use_tokenizer_template"`
UseTokenizerTemplate bool `yaml:"use_tokenizer_template,omitempty" json:"use_tokenizer_template,omitempty"`
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
// It defaults to \n
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character" json:"join_chat_messages_by_character"`
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character,omitempty" json:"join_chat_messages_by_character,omitempty"`
Multimodal string `yaml:"multimodal" json:"multimodal"`
Multimodal string `yaml:"multimodal,omitempty" json:"multimodal,omitempty"`
JinjaTemplate bool `yaml:"jinja_template" json:"jinja_template"`
ReplyPrefix string `yaml:"reply_prefix,omitempty" json:"reply_prefix,omitempty"`
}
ReplyPrefix string `yaml:"reply_prefix" json:"reply_prefix"`
func (c *ModelConfig) syncKnownUsecasesFromString() {
c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
// Make sure the usecases are valid, we rewrite with what we identified
c.KnownUsecaseStrings = []string{}
for k, usecase := range GetAllModelConfigUsecases() {
if c.HasUsecases(usecase) {
c.KnownUsecaseStrings = append(c.KnownUsecaseStrings, k)
}
}
}
func (c *ModelConfig) UnmarshalYAML(value *yaml.Node) error {
@@ -278,14 +294,7 @@ func (c *ModelConfig) UnmarshalYAML(value *yaml.Node) error {
}
*c = ModelConfig(aux)
c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
// Make sure the usecases are valid, we rewrite with what we identified
c.KnownUsecaseStrings = []string{}
for k, usecase := range GetAllModelConfigUsecases() {
if c.HasUsecases(usecase) {
c.KnownUsecaseStrings = append(c.KnownUsecaseStrings, k)
}
}
c.syncKnownUsecasesFromString()
return nil
}
@@ -462,6 +471,7 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
}
guessDefaultsFromFile(cfg, lo.modelPath, ctx)
cfg.syncKnownUsecasesFromString()
}
func (c *ModelConfig) Validate() bool {
@@ -492,7 +502,7 @@ func (c *ModelConfig) Validate() bool {
}
func (c *ModelConfig) HasTemplate() bool {
return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != "" || c.TemplateConfig.UseTokenizerTemplate
}
func (c *ModelConfig) GetModelConfigFile() string {
@@ -573,7 +583,7 @@ func (c *ModelConfig) HasUsecases(u ModelConfigUsecases) bool {
// This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently.
func (c *ModelConfig) GuessUsecases(u ModelConfigUsecases) bool {
if (u & FLAG_CHAT) == FLAG_CHAT {
if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" {
if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" && !c.TemplateConfig.UseTokenizerTemplate {
return false
}
}

View File

@@ -4,6 +4,7 @@ package gallery
import (
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
@@ -310,8 +311,10 @@ func ListSystemBackends(systemState *system.SystemState) (SystemBackends, error)
}
}
}
} else {
} else if !errors.Is(err, os.ErrNotExist) {
log.Warn().Err(err).Msg("Failed to read system backends, proceeding with user-managed backends")
} else if errors.Is(err, os.ErrNotExist) {
log.Debug().Msg("No system backends found")
}
// User-managed backends and alias collection

View File

@@ -61,12 +61,15 @@ func (gm GalleryElements[T]) Search(term string) GalleryElements[T] {
term = strings.ToLower(term)
for _, m := range gm {
if fuzzy.Match(term, strings.ToLower(m.GetName())) ||
fuzzy.Match(term, strings.ToLower(m.GetDescription())) ||
fuzzy.Match(term, strings.ToLower(m.GetGallery().Name)) ||
strings.Contains(strings.ToLower(m.GetName()), term) ||
strings.Contains(strings.ToLower(m.GetDescription()), term) ||
strings.Contains(strings.ToLower(m.GetGallery().Name), term) ||
strings.Contains(strings.ToLower(strings.Join(m.GetTags(), ",")), term) {
filteredModels = append(filteredModels, m)
}
}
return filteredModels
}

View File

@@ -0,0 +1,65 @@
package importers
import (
"encoding/json"
"strings"
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/core/gallery"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
var DefaultImporters = []Importer{
&LlamaCPPImporter{},
&MLXImporter{},
}
type Details struct {
HuggingFace *hfapi.ModelDetails
URI string
Preferences json.RawMessage
}
type Importer interface {
Match(details Details) bool
Import(details Details) (gallery.ModelConfig, error)
}
func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.ModelConfig, error) {
var err error
var modelConfig gallery.ModelConfig
hf := hfapi.NewClient()
hfrepoID := strings.ReplaceAll(uri, "huggingface://", "")
hfrepoID = strings.ReplaceAll(hfrepoID, "hf://", "")
hfrepoID = strings.ReplaceAll(hfrepoID, "https://huggingface.co/", "")
hfDetails, err := hf.GetModelDetails(hfrepoID)
if err != nil {
// maybe not a HF repository
// TODO: maybe we can check if the URI is a valid HF repository
log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository")
} else {
log.Debug().Str("uri", uri).Msg("Got model details")
log.Debug().Any("details", hfDetails).Msg("Model details")
}
details := Details{
HuggingFace: hfDetails,
URI: uri,
Preferences: preferences,
}
for _, importer := range DefaultImporters {
if importer.Match(details) {
modelConfig, err = importer.Import(details)
if err != nil {
continue
}
break
}
}
return modelConfig, err
}

View File

@@ -0,0 +1,13 @@
package importers_test
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestImporters(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Importers test suite")
}

View File

@@ -0,0 +1,215 @@
package importers_test
import (
"encoding/json"
"fmt"
"github.com/mudler/LocalAI/core/gallery/importers"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("DiscoverModelConfig", func() {
Context("With only a repository URI", func() {
It("should discover and import using LlamaCPPImporter", func() {
uri := "https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
Expect(modelConfig.Name).To(Equal("LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].Filename).To(Equal("localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/resolve/main/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].SHA256).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"), fmt.Sprintf("Model config: %+v", modelConfig))
})
It("should discover and import using LlamaCPPImporter", func() {
uri := "https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
Expect(modelConfig.Name).To(Equal("Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: mmproj/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(len(modelConfig.Files)).To(Equal(2), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].Filename).To(Equal("Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/Qwen3VL-2B-Instruct-Q4_K_M.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[1].Filename).To(Equal("mmproj/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[1].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/mmproj-Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[1].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
})
It("should discover and import using LlamaCPPImporter", func() {
uri := "https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"
preferences := json.RawMessage(`{ "quantizations": "Q8_0", "mmproj_quantizations": "f16" }`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
Expect(modelConfig.Name).To(Equal("Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.ConfigFile).To(ContainSubstring("mmproj: mmproj/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(len(modelConfig.Files)).To(Equal(2), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].Filename).To(Equal("Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/Qwen3VL-2B-Instruct-Q8_0.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[1].Filename).To(Equal("mmproj/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[1].URI).To(Equal("https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct-GGUF/resolve/main/mmproj-Qwen3VL-2B-Instruct-F16.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[1].SHA256).ToNot(BeEmpty(), fmt.Sprintf("Model config: %+v", modelConfig))
})
})
Context("with .gguf URI", func() {
It("should discover and import using LlamaCPPImporter", func() {
uri := "https://example.com/my-model.gguf"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("my-model.gguf"))
Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
})
It("should use custom preferences when provided", func() {
uri := "https://example.com/my-model.gguf"
preferences := json.RawMessage(`{"name": "custom-name", "description": "Custom description"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("custom-name"))
Expect(modelConfig.Description).To(Equal("Custom description"))
})
})
Context("with mlx-community URI", func() {
It("should discover and import using MLXImporter", func() {
uri := "https://huggingface.co/mlx-community/test-model"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
})
It("should use custom preferences when provided", func() {
uri := "https://huggingface.co/mlx-community/test-model"
preferences := json.RawMessage(`{"name": "custom-mlx", "description": "Custom MLX description"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("custom-mlx"))
Expect(modelConfig.Description).To(Equal("Custom MLX description"))
})
})
Context("with backend preference", func() {
It("should use llama-cpp backend when specified", func() {
uri := "https://example.com/model"
preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
})
It("should use mlx backend when specified", func() {
uri := "https://example.com/model"
preferences := json.RawMessage(`{"backend": "mlx"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
})
It("should use mlx-vlm backend when specified", func() {
uri := "https://example.com/model"
preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm"))
})
})
Context("with HuggingFace URI formats", func() {
It("should handle huggingface:// prefix", func() {
uri := "huggingface://mlx-community/test-model"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
})
It("should handle hf:// prefix", func() {
uri := "hf://mlx-community/test-model"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
})
It("should handle https://huggingface.co/ prefix", func() {
uri := "https://huggingface.co/mlx-community/test-model"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
})
})
Context("with invalid or non-matching URI", func() {
It("should return error when no importer matches", func() {
uri := "https://example.com/unknown-model.bin"
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
// When no importer matches, the function returns empty config and error
// The exact behavior depends on implementation, but typically an error is returned
Expect(modelConfig.Name).To(BeEmpty())
Expect(err).To(HaveOccurred())
})
})
Context("with invalid JSON preferences", func() {
It("should return error when JSON is invalid even if URI matches", func() {
uri := "https://example.com/model.gguf"
preferences := json.RawMessage(`invalid json`)
// Even though Match() returns true for .gguf extension,
// Import() will fail when trying to unmarshal invalid JSON preferences
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).To(HaveOccurred())
Expect(modelConfig.Name).To(BeEmpty())
})
})
})

View File

@@ -0,0 +1,201 @@
package importers
import (
"encoding/json"
"path/filepath"
"slices"
"strings"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
"go.yaml.in/yaml/v2"
)
var _ Importer = &LlamaCPPImporter{}
type LlamaCPPImporter struct{}
func (i *LlamaCPPImporter) Match(details Details) bool {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return false
}
preferencesMap := make(map[string]any)
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return false
}
if preferencesMap["backend"] == "llama-cpp" {
return true
}
if strings.HasSuffix(details.URI, ".gguf") {
return true
}
if details.HuggingFace != nil {
for _, file := range details.HuggingFace.Files {
if strings.HasSuffix(file.Path, ".gguf") {
return true
}
}
}
return false
}
func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return gallery.ModelConfig{}, err
}
preferencesMap := make(map[string]any)
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return gallery.ModelConfig{}, err
}
name, ok := preferencesMap["name"].(string)
if !ok {
name = filepath.Base(details.URI)
}
description, ok := preferencesMap["description"].(string)
if !ok {
description = "Imported from " + details.URI
}
preferedQuantizations, _ := preferencesMap["quantizations"].(string)
quants := []string{"q4_k_m"}
if preferedQuantizations != "" {
quants = strings.Split(preferedQuantizations, ",")
}
mmprojQuants, _ := preferencesMap["mmproj_quantizations"].(string)
mmprojQuantsList := []string{"fp16"}
if mmprojQuants != "" {
mmprojQuantsList = strings.Split(mmprojQuants, ",")
}
modelConfig := config.ModelConfig{
Name: name,
Description: description,
KnownUsecaseStrings: []string{"chat"},
Backend: "llama-cpp",
TemplateConfig: config.TemplateConfig{
UseTokenizerTemplate: true,
},
FunctionsConfig: functions.FunctionsConfig{
GrammarConfig: functions.GrammarConfig{
NoGrammar: true,
},
},
}
cfg := gallery.ModelConfig{
Name: name,
Description: description,
}
if strings.HasSuffix(details.URI, ".gguf") {
cfg.Files = append(cfg.Files, gallery.File{
URI: details.URI,
Filename: filepath.Base(details.URI),
})
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: filepath.Base(details.URI),
},
}
} else if details.HuggingFace != nil {
// We want to:
// Get first the chosen quants that match filenames
// OR the first mmproj/gguf file found
var lastMMProjFile *gallery.File
var lastGGUFFile *gallery.File
foundPreferedQuant := false
foundPreferedMMprojQuant := false
for _, file := range details.HuggingFace.Files {
// Get the mmproj prefered quants
if strings.Contains(strings.ToLower(file.Path), "mmproj") {
lastMMProjFile = &gallery.File{
URI: file.URL,
Filename: filepath.Join("mmproj", filepath.Base(file.Path)),
SHA256: file.SHA256,
}
if slices.ContainsFunc(mmprojQuantsList, func(quant string) bool {
return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
}) {
cfg.Files = append(cfg.Files, *lastMMProjFile)
foundPreferedMMprojQuant = true
}
} else if strings.HasSuffix(strings.ToLower(file.Path), "gguf") {
lastGGUFFile = &gallery.File{
URI: file.URL,
Filename: filepath.Base(file.Path),
SHA256: file.SHA256,
}
// get the files of the prefered quants
if slices.ContainsFunc(quants, func(quant string) bool {
return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
}) {
foundPreferedQuant = true
cfg.Files = append(cfg.Files, *lastGGUFFile)
}
}
}
// Make sure to add at least one file if not already present (which is the latest one)
if lastMMProjFile != nil && !foundPreferedMMprojQuant {
if !slices.ContainsFunc(cfg.Files, func(f gallery.File) bool {
return f.Filename == lastMMProjFile.Filename
}) {
cfg.Files = append(cfg.Files, *lastMMProjFile)
}
}
if lastGGUFFile != nil && !foundPreferedQuant {
if !slices.ContainsFunc(cfg.Files, func(f gallery.File) bool {
return f.Filename == lastGGUFFile.Filename
}) {
cfg.Files = append(cfg.Files, *lastGGUFFile)
}
}
// Find first mmproj file and configure it in the config file
for _, file := range cfg.Files {
if !strings.Contains(strings.ToLower(file.Filename), "mmproj") {
continue
}
modelConfig.MMProj = file.Filename
break
}
// Find first non-mmproj file and configure it in the config file
for _, file := range cfg.Files {
if strings.Contains(strings.ToLower(file.Filename), "mmproj") {
continue
}
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: file.Filename,
},
}
break
}
}
data, err := yaml.Marshal(modelConfig)
if err != nil {
return gallery.ModelConfig{}, err
}
cfg.ConfigFile = string(data)
return cfg, nil
}

View File

@@ -0,0 +1,131 @@
package importers_test
import (
"encoding/json"
"fmt"
"github.com/mudler/LocalAI/core/gallery/importers"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("LlamaCPPImporter", func() {
var importer *importers.LlamaCPPImporter
BeforeEach(func() {
importer = &importers.LlamaCPPImporter{}
})
Context("Match", func() {
It("should match when URI ends with .gguf", func() {
details := importers.Details{
URI: "https://example.com/model.gguf",
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should match when backend preference is llama-cpp", func() {
preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
details := importers.Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should not match when URI does not end with .gguf and no backend preference", func() {
details := importers.Details{
URI: "https://example.com/model.bin",
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
It("should not match when backend preference is different", func() {
preferences := json.RawMessage(`{"backend": "mlx"}`)
details := importers.Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
It("should return false when JSON preferences are invalid", func() {
preferences := json.RawMessage(`invalid json`)
details := importers.Details{
URI: "https://example.com/model.gguf",
Preferences: preferences,
}
// Invalid JSON causes Match to return false early
result := importer.Match(details)
Expect(result).To(BeFalse())
})
})
Context("Import", func() {
It("should import model config with default name and description", func() {
details := importers.Details{
URI: "https://example.com/my-model.gguf",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("my-model.gguf"))
Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
})
It("should import model config with custom name and description from preferences", func() {
preferences := json.RawMessage(`{"name": "custom-model", "description": "Custom description"}`)
details := importers.Details{
URI: "https://example.com/my-model.gguf",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("custom-model"))
Expect(modelConfig.Description).To(Equal("Custom description"))
Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
})
It("should handle invalid JSON preferences", func() {
preferences := json.RawMessage(`invalid json`)
details := importers.Details{
URI: "https://example.com/my-model.gguf",
Preferences: preferences,
}
_, err := importer.Import(details)
Expect(err).To(HaveOccurred())
})
It("should extract filename correctly from URI with path", func() {
details := importers.Details{
URI: "https://example.com/path/to/model.gguf",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/path/to/model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
})
})
})

View File

@@ -0,0 +1,94 @@
package importers
import (
"encoding/json"
"path/filepath"
"strings"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"go.yaml.in/yaml/v2"
)
var _ Importer = &MLXImporter{}
type MLXImporter struct{}
func (i *MLXImporter) Match(details Details) bool {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return false
}
preferencesMap := make(map[string]any)
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return false
}
b, ok := preferencesMap["backend"].(string)
if ok && b == "mlx" || b == "mlx-vlm" {
return true
}
// All https://huggingface.co/mlx-community/*
if strings.Contains(details.URI, "mlx-community/") {
return true
}
return false
}
func (i *MLXImporter) Import(details Details) (gallery.ModelConfig, error) {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return gallery.ModelConfig{}, err
}
preferencesMap := make(map[string]any)
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return gallery.ModelConfig{}, err
}
name, ok := preferencesMap["name"].(string)
if !ok {
name = filepath.Base(details.URI)
}
description, ok := preferencesMap["description"].(string)
if !ok {
description = "Imported from " + details.URI
}
backend := "mlx"
b, ok := preferencesMap["backend"].(string)
if ok {
backend = b
}
modelConfig := config.ModelConfig{
Name: name,
Description: description,
KnownUsecaseStrings: []string{"chat"},
Backend: backend,
PredictionOptions: schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: details.URI,
},
},
TemplateConfig: config.TemplateConfig{
UseTokenizerTemplate: true,
},
}
data, err := yaml.Marshal(modelConfig)
if err != nil {
return gallery.ModelConfig{}, err
}
return gallery.ModelConfig{
Name: name,
Description: description,
ConfigFile: string(data),
}, nil
}

View File

@@ -0,0 +1,147 @@
package importers_test
import (
"encoding/json"
"github.com/mudler/LocalAI/core/gallery/importers"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("MLXImporter", func() {
var importer *importers.MLXImporter
BeforeEach(func() {
importer = &importers.MLXImporter{}
})
Context("Match", func() {
It("should match when URI contains mlx-community/", func() {
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should match when backend preference is mlx", func() {
preferences := json.RawMessage(`{"backend": "mlx"}`)
details := importers.Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should match when backend preference is mlx-vlm", func() {
preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
details := importers.Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should not match when URI does not contain mlx-community/ and no backend preference", func() {
details := importers.Details{
URI: "https://huggingface.co/other-org/test-model",
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
It("should not match when backend preference is different", func() {
preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
details := importers.Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
It("should return false when JSON preferences are invalid", func() {
preferences := json.RawMessage(`invalid json`)
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
Preferences: preferences,
}
// Invalid JSON causes Match to return false early
result := importer.Match(details)
Expect(result).To(BeFalse())
})
})
Context("Import", func() {
It("should import model config with default name and description", func() {
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: https://huggingface.co/mlx-community/test-model"))
})
It("should import model config with custom name and description from preferences", func() {
preferences := json.RawMessage(`{"name": "custom-mlx-model", "description": "Custom MLX description"}`)
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("custom-mlx-model"))
Expect(modelConfig.Description).To(Equal("Custom MLX description"))
})
It("should use custom backend from preferences", func() {
preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm"))
})
It("should handle invalid JSON preferences", func() {
preferences := json.RawMessage(`invalid json`)
details := importers.Details{
URI: "https://huggingface.co/mlx-community/test-model",
Preferences: preferences,
}
_, err := importer.Import(details)
Expect(err).To(HaveOccurred())
})
It("should extract filename correctly from URI with path", func() {
details := importers.Details{
URI: "https://huggingface.co/mlx-community/path/to/model",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("model"))
})
})
})

View File

@@ -200,11 +200,16 @@ func API(application *application.Application) (*fiber.App, error) {
requestExtractor := middleware.NewRequestExtractor(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterElevenLabsRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
// Create opcache for tracking UI operations (used by both UI and LocalAI routes)
var opcache *services.OpCache
if !application.ApplicationConfig().DisableWebUI {
opcache = services.NewOpCache(application.GalleryService())
}
routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
routes.RegisterOpenAIRoutes(router, requestExtractor, application)
if !application.ApplicationConfig().DisableWebUI {
// Create opcache for tracking UI operations
opcache := services.NewOpCache(application.GalleryService())
routes.RegisterUIAPIRoutes(router, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
routes.RegisterUIRoutes(router, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
}

View File

@@ -76,7 +76,7 @@ func (mgs *BackendEndpointService) ApplyBackendEndpoint() func(c *fiber.Ctx) err
if err != nil {
return err
}
mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{
mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{
ID: uuid.String(),
GalleryElementName: input.ID,
Galleries: mgs.galleries,
@@ -95,7 +95,7 @@ func (mgs *BackendEndpointService) DeleteBackendEndpoint() func(c *fiber.Ctx) er
return func(c *fiber.Ctx) error {
backendName := c.Params("name")
mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{
mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{
Delete: true,
GalleryElementName: backendName,
Galleries: mgs.galleries,

View File

@@ -155,7 +155,7 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applicati
}
// Reload configurations
if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath); err != nil {
if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
response := ModelResponse{
Success: false,
Error: "Failed to reload configurations: " + err.Error(),

View File

@@ -77,7 +77,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
if err != nil {
return err
}
mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Req: input.GalleryModel,
ID: uuid.String(),
GalleryElementName: input.ID,
@@ -98,7 +98,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
return func(c *fiber.Ctx) error {
modelName := c.Params("name")
mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Delete: true,
GalleryElementName: modelName,
}

View File

@@ -2,16 +2,72 @@ package localai
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/gallery/importers"
httpUtils "github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/utils"
"gopkg.in/yaml.v3"
)
// ImportModelURIEndpoint handles creating new model configurations from a URI
func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) fiber.Handler {
return func(c *fiber.Ctx) error {
input := new(schema.ImportModelRequest)
if err := c.BodyParser(input); err != nil {
return err
}
modelConfig, err := importers.DiscoverModelConfig(input.URI, input.Preferences)
if err != nil {
return fmt.Errorf("failed to discover model config: %w", err)
}
uuid, err := uuid.NewUUID()
if err != nil {
return err
}
// Determine gallery ID for tracking - use model name if available, otherwise use URI
galleryID := input.URI
if modelConfig.Name != "" {
galleryID = modelConfig.Name
}
// Register operation in opcache if available (for UI progress tracking)
if opcache != nil {
opcache.Set(galleryID, uuid.String())
}
galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Req: gallery.GalleryModel{
Overrides: map[string]interface{}{},
},
ID: uuid.String(),
GalleryElementName: galleryID,
GalleryElement: &modelConfig,
BackendGalleries: appConfig.BackendGalleries,
}
return c.JSON(schema.GalleryResponse{
ID: uuid.String(),
StatusURL: fmt.Sprintf("%smodels/jobs/%s", httpUtils.BaseURL(c), uuid.String()),
})
}
}
// ImportModelEndpoint handles creating new model configurations
func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) fiber.Handler {
return func(c *fiber.Ctx) error {

View File

@@ -2,9 +2,10 @@ package openai
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"net"
"time"
"github.com/gofiber/fiber/v2"
@@ -22,6 +23,59 @@ import (
"github.com/valyala/fasthttp"
)
// NOTE: this is a bad WORKAROUND! We should find a better way to handle this.
// Fasthttp doesn't support context cancellation from the caller
// for non-streaming requests, so we need to monitor the connection directly.
// Monitor connection for client disconnection during non-streaming requests
// We access the connection directly via c.Context().Conn() to monitor it
// during ComputeChoices execution, not after the response is sent
// see: https://github.com/mudler/LocalAI/pull/7187#issuecomment-3506720906
func handleConnectionCancellation(c *fiber.Ctx, cancelFunc func(), requestCtx context.Context) {
var conn net.Conn = c.Context().Conn()
if conn == nil {
return
}
go func() {
defer func() {
// Clear read deadline when goroutine exits
conn.SetReadDeadline(time.Time{})
}()
buf := make([]byte, 1)
// Use a short read deadline to periodically check if connection is closed
// Without a deadline, Read() would block indefinitely waiting for data
// that will never come (client is waiting for response, not sending more data)
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-requestCtx.Done():
// Request completed or was cancelled - exit goroutine
return
case <-ticker.C:
// Set a short deadline - if connection is closed, read will fail immediately
// If connection is open but no data, it will timeout and we check again
conn.SetReadDeadline(time.Now().Add(50 * time.Millisecond))
_, err := conn.Read(buf)
if err != nil {
// Check if it's a timeout (connection still open, just no data)
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
// Timeout is expected - connection is still open, just no data to read
// Continue the loop to check again
continue
}
// Connection closed or other error - cancel the context to stop gRPC call
log.Debug().Msgf("Calling cancellation function")
cancelFunc()
return
}
}
}
}()
}
// ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create
// @Summary Generate a chat completions for a given prompt and model.
// @Param request body schema.OpenAIRequest true "query params"
@@ -36,7 +90,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
Object: "chat.completion.chunk",
}
responses <- initialMessage
@@ -56,7 +110,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0, FinishReason: nil}},
Object: "chat.completion.chunk",
Usage: usage,
}
@@ -90,7 +144,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant"}, Index: 0, FinishReason: nil}},
Object: "chat.completion.chunk",
}
responses <- initialMessage
@@ -114,7 +168,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
ID: id,
Created: created,
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0, FinishReason: nil}},
Object: "chat.completion.chunk",
Usage: usage,
}
@@ -142,7 +196,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
},
},
},
}}},
},
Index: 0,
FinishReason: nil,
}},
Object: "chat.completion.chunk",
}
responses <- initialMessage
@@ -165,7 +222,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
},
},
},
}}},
},
Index: 0,
FinishReason: nil,
}},
Object: "chat.completion.chunk",
}
}
@@ -217,6 +277,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
}
// If we are using a response format, we need to generate a grammar for it
if config.ResponseFormatMap != nil {
d := schema.ChatCompletionResponseFormat{}
dat, err := json.Marshal(config.ResponseFormatMap)
@@ -260,6 +321,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
switch {
// Generates grammar with internal's LocalAI engine
case (!config.FunctionsConfig.GrammarConfig.NoGrammar || strictMode) && shouldUseFn:
noActionGrammar := functions.Function{
Name: noActionName,
@@ -283,7 +345,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
funcs = funcs.Select(config.FunctionToCall())
}
// Update input grammar
// Update input grammar or json_schema based on use_llama_grammar option
jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarOptions()...)
if err == nil {
@@ -298,6 +360,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
} else {
log.Error().Err(err).Msg("Failed generating grammar")
}
default:
// Force picking one of the functions by the request
if config.FunctionToCall() != "" {
@@ -316,7 +379,7 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
// If we are using the tokenizer template, we don't need to process the messages
// unless we are processing functions
if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
if !config.TemplateConfig.UseTokenizerTemplate {
predInput = evaluator.TemplateMessages(*input, input.Messages, config, funcs, shouldUseFn)
log.Debug().Msgf("Prompt (after templating): %s", predInput)
@@ -355,6 +418,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
LOOP:
for {
select {
case <-input.Context.Done():
// Context was cancelled (client disconnected or request cancelled)
log.Debug().Msgf("Request context cancelled, stopping stream")
input.Cancel()
break LOOP
case ev := <-responses:
if len(ev.Choices) == 0 {
log.Debug().Msgf("No choices in the response, skipping")
@@ -364,11 +432,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
if len(ev.Choices[0].Delta.ToolCalls) > 0 {
toolsCalled = true
}
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.Encode(ev)
log.Debug().Msgf("Sending chunk: %s", buf.String())
_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
respData, err := json.Marshal(ev)
if err != nil {
log.Debug().Msgf("Failed to marshal response: %v", err)
input.Cancel()
continue
}
log.Debug().Msgf("Sending chunk: %s", string(respData))
_, err = fmt.Fprintf(w, "data: %s\n\n", string(respData))
if err != nil {
log.Debug().Msgf("Sending chunk failed: %v", err)
input.Cancel()
@@ -380,22 +451,28 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
log.Error().Msgf("Stream ended with error: %v", err)
stopReason := FinishReasonStop
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
FinishReason: "stop",
FinishReason: &stopReason,
Index: 0,
Delta: &schema.Message{Content: "Internal error: " + err.Error()},
}},
Object: "chat.completion.chunk",
Usage: *usage,
}
respData, _ := json.Marshal(resp)
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
respData, marshalErr := json.Marshal(resp)
if marshalErr != nil {
log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
// Send a simple error message as fallback
w.WriteString("data: {\"error\":\"Internal error\"}\n\n")
} else {
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
}
w.WriteString("data: [DONE]\n\n")
w.Flush()
@@ -403,11 +480,11 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
}
finishReason := "stop"
finishReason := FinishReasonStop
if toolsCalled && len(input.Tools) > 0 {
finishReason = "tool_calls"
finishReason = FinishReasonToolCalls
} else if toolsCalled {
finishReason = "function_call"
finishReason = FinishReasonFunctionCall
}
resp := &schema.OpenAIResponse{
@@ -416,9 +493,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
FinishReason: finishReason,
FinishReason: &finishReason,
Index: 0,
Delta: &schema.Message{Content: &textContentToReturn},
Delta: &schema.Message{},
}},
Object: "chat.completion.chunk",
Usage: *usage,
@@ -439,7 +516,8 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
tokenCallback := func(s string, c *[]schema.Choice) {
if !shouldUseFn {
// no function is called, just reply and use stop as finish reason
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
stopReason := FinishReasonStop
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
return
}
@@ -457,12 +535,14 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
return
}
stopReason := FinishReasonStop
*c = append(*c, schema.Choice{
FinishReason: "stop",
FinishReason: &stopReason,
Message: &schema.Message{Role: "assistant", Content: &result}})
default:
toolCallsReason := FinishReasonToolCalls
toolChoice := schema.Choice{
FinishReason: "tool_calls",
FinishReason: &toolCallsReason,
Message: &schema.Message{
Role: "assistant",
},
@@ -486,8 +566,9 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
)
} else {
// otherwise we return more choices directly (deprecated)
functionCallReason := FinishReasonFunctionCall
*c = append(*c, schema.Choice{
FinishReason: "function_call",
FinishReason: &functionCallReason,
Message: &schema.Message{
Role: "assistant",
Content: &textContentToReturn,
@@ -508,6 +589,10 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
}
// NOTE: this is a workaround as fasthttp
// context cancellation does not fire in non-streaming requests
handleConnectionCancellation(c, input.Cancel, input.Context)
result, tokenUsage, err := ComputeChoices(
input,
predInput,
@@ -597,7 +682,23 @@ func handleQuestion(config *config.ModelConfig, cl *config.ModelConfigLoader, in
audios = append(audios, m.StringAudios...)
}
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, cl, o, nil)
// Serialize tools and tool_choice to JSON strings
toolsJSON := ""
if len(input.Tools) > 0 {
toolsBytes, err := json.Marshal(input.Tools)
if err == nil {
toolsJSON = string(toolsBytes)
}
}
toolChoiceJSON := ""
if input.ToolsChoice != nil {
toolChoiceBytes, err := json.Marshal(input.ToolsChoice)
if err == nil {
toolChoiceJSON = string(toolChoiceBytes)
}
}
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, config, cl, o, nil, toolsJSON, toolChoiceJSON)
if err != nil {
log.Error().Err(err).Msg("model inference failed")
return "", err

View File

@@ -2,7 +2,6 @@ package openai
import (
"bufio"
"bytes"
"encoding/json"
"errors"
"fmt"
@@ -47,8 +46,9 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{
{
Index: 0,
Text: s,
Index: 0,
Text: s,
FinishReason: nil,
},
},
Object: "text_completion",
@@ -140,24 +140,49 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
log.Debug().Msgf("No choices in the response, skipping")
continue
}
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.Encode(ev)
respData, err := json.Marshal(ev)
if err != nil {
log.Debug().Msgf("Failed to marshal response: %v", err)
continue
}
log.Debug().Msgf("Sending chunk: %s", buf.String())
fmt.Fprintf(w, "data: %v\n", buf.String())
log.Debug().Msgf("Sending chunk: %s", string(respData))
fmt.Fprintf(w, "data: %s\n\n", string(respData))
w.Flush()
case err := <-ended:
if err == nil {
break LOOP
}
log.Error().Msgf("Stream ended with error: %v", err)
fmt.Fprintf(w, "data: %v\n", "Internal error: "+err.Error())
stopReason := FinishReasonStop
errorResp := schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model,
Choices: []schema.Choice{
{
Index: 0,
FinishReason: &stopReason,
Text: "Internal error: " + err.Error(),
},
},
Object: "text_completion",
}
errorData, marshalErr := json.Marshal(errorResp)
if marshalErr != nil {
log.Error().Msgf("Failed to marshal error response: %v", marshalErr)
// Send a simple error message as fallback
fmt.Fprintf(w, "data: {\"error\":\"Internal error\"}\n\n")
} else {
fmt.Fprintf(w, "data: %s\n\n", string(errorData))
}
w.Flush()
break LOOP
}
}
stopReason := FinishReasonStop
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
@@ -165,7 +190,7 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
Choices: []schema.Choice{
{
Index: 0,
FinishReason: "stop",
FinishReason: &stopReason,
},
},
Object: "text_completion",
@@ -197,7 +222,8 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
r, tokenUsage, err := ComputeChoices(
input, i, config, cl, appConfig, ml, func(s string, c *[]schema.Choice) {
*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
stopReason := FinishReasonStop
*c = append(*c, schema.Choice{Text: s, FinishReason: &stopReason, Index: k})
}, nil)
if err != nil {
return err

View File

@@ -0,0 +1,8 @@
package openai
// Finish reason constants for OpenAI API responses
const (
FinishReasonStop = "stop"
FinishReasonToolCalls = "tool_calls"
FinishReasonFunctionCall = "function_call"
)

View File

@@ -1,6 +1,8 @@
package openai
import (
"encoding/json"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
@@ -37,8 +39,25 @@ func ComputeChoices(
audios = append(audios, m.StringAudios...)
}
// Serialize tools and tool_choice to JSON strings
toolsJSON := ""
if len(req.Tools) > 0 {
toolsBytes, err := json.Marshal(req.Tools)
if err == nil {
toolsJSON = string(toolsBytes)
}
}
toolChoiceJSON := ""
if req.ToolsChoice != nil {
toolChoiceBytes, err := json.Marshal(req.ToolsChoice)
if err == nil {
toolChoiceJSON = string(toolChoiceBytes)
}
}
// get the model function to call for the result
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, config, bcl, o, tokenCallback)
predFunc, err := backend.ModelInference(
req.Context, predInput, req.Messages, images, videos, audios, loader, config, bcl, o, tokenCallback, toolsJSON, toolChoiceJSON)
if err != nil {
return result, backend.TokenUsage{}, err
}

View File

@@ -1,6 +1,7 @@
package openai
import (
"context"
"encoding/json"
"errors"
"fmt"
@@ -50,12 +51,15 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
}
// Get MCP config from model config
remote, stdio := config.MCP.MCPConfigFromYAML()
remote, stdio, err := config.MCP.MCPConfigFromYAML()
if err != nil {
return fmt.Errorf("failed to get MCP config: %w", err)
}
// Check if we have tools in cache, or we have to have an initial connection
sessions, err := mcpTools.SessionsFromMCPConfig(config.Name, remote, stdio)
if err != nil {
return err
return fmt.Errorf("failed to get MCP sessions: %w", err)
}
if len(sessions) == 0 {
@@ -73,6 +77,10 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
if appConfig.ApiKeys != nil {
apiKey = appConfig.ApiKeys[0]
}
ctxWithCancellation, cancel := context.WithCancel(ctx)
defer cancel()
handleConnectionCancellation(c, cancel, ctxWithCancellation)
// TODO: instead of connecting to the API, we should just wire this internally
// and act like completion.go.
// We can do this as cogito expects an interface and we can create one that
@@ -83,7 +91,7 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
cogito.WithStatusCallback(func(s string) {
log.Debug().Msgf("[model agent] [model: %s] Status: %s", config.Name, s)
}),
cogito.WithContext(ctx),
cogito.WithContext(ctxWithCancellation),
cogito.WithMCPs(sessions...),
cogito.WithIterations(3), // default to 3 iterations
cogito.WithMaxAttempts(3), // default to 3 attempts

View File

@@ -1072,7 +1072,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
result, tokenUsage, err := ComputeChoices(input, prompt, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
if !shouldUseFn {
// no function is called, just reply and use stop as finish reason
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
stopReason := FinishReasonStop
*c = append(*c, schema.Choice{FinishReason: &stopReason, Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
return
}
@@ -1099,7 +1100,8 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
}
if len(input.Tools) > 0 {
toolChoice.FinishReason = "tool_calls"
toolCallsReason := FinishReasonToolCalls
toolChoice.FinishReason = &toolCallsReason
}
for _, ss := range results {
@@ -1120,8 +1122,9 @@ func processTextResponse(config *config.ModelConfig, session *Session, prompt st
)
} else {
// otherwise we return more choices directly
functionCallReason := FinishReasonFunctionCall
*c = append(*c, schema.Choice{
FinishReason: "function_call",
FinishReason: &functionCallReason,
Message: &schema.Message{
Role: "assistant",
Content: &textContentToReturn,

View File

@@ -15,6 +15,7 @@ import (
"github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/valyala/fasthttp"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
@@ -161,7 +162,18 @@ func (re *RequestExtractor) SetOpenAIRequest(ctx *fiber.Ctx) error {
correlationID := ctx.Get("X-Correlation-ID", uuid.New().String())
ctx.Set("X-Correlation-ID", correlationID)
//c1, cancel := context.WithCancel(re.applicationConfig.Context)
// Use the application context as parent to ensure cancellation on app shutdown
// We'll monitor the Fiber context separately and cancel our context when the request is canceled
c1, cancel := context.WithCancel(re.applicationConfig.Context)
// Monitor the Fiber context and cancel our context when it's canceled
// This ensures we respect request cancellation without causing panics
go func(fiberCtx *fasthttp.RequestCtx) {
if fiberCtx != nil {
<-fiberCtx.Done()
cancel()
}
}(ctx.Context())
// Add the correlation ID to the new context
ctxWithCorrelationID := context.WithValue(c1, CorrelationIDKey, correlationID)

View File

@@ -18,7 +18,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
cl *config.ModelConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
galleryService *services.GalleryService) {
galleryService *services.GalleryService,
opcache *services.OpCache) {
router.Get("/swagger/*", swagger.HandlerDefault) // default
@@ -57,6 +58,9 @@ func RegisterLocalAIRoutes(router *fiber.App,
// Custom model import endpoint
router.Post("/models/import", localai.ImportModelEndpoint(cl, appConfig))
// URI model import endpoint
router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache))
// Custom model edit endpoint
router.Post("/models/edit/:name", localai.EditModelEndpoint(cl, appConfig))

View File

@@ -91,11 +91,15 @@ func RegisterUIRoutes(app *fiber.App,
}
title := "LocalAI - Chat"
var modelContextSize *int
for _, b := range modelConfigs {
if b.HasUsecases(config.FLAG_CHAT) {
modelThatCanBeUsed = b.Name
title = "LocalAI - Chat with " + modelThatCanBeUsed
if b.LLMConfig.ContextSize != nil {
modelContextSize = b.LLMConfig.ContextSize
}
break
}
}
@@ -107,6 +111,7 @@ func RegisterUIRoutes(app *fiber.App,
"GalleryConfig": galleryConfigs,
"ModelsConfig": modelConfigs,
"Model": modelThatCanBeUsed,
"ContextSize": modelContextSize,
"Version": internal.PrintableVersion(),
}
@@ -120,6 +125,8 @@ func RegisterUIRoutes(app *fiber.App,
modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)
galleryConfigs := map[string]*gallery.ModelConfig{}
modelName := c.Params("model")
var modelContextSize *int
for _, m := range modelConfigs {
cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
@@ -127,15 +134,19 @@ func RegisterUIRoutes(app *fiber.App,
continue
}
galleryConfigs[m.Name] = cfg
if m.Name == modelName && m.LLMConfig.ContextSize != nil {
modelContextSize = m.LLMConfig.ContextSize
}
}
summary := fiber.Map{
"Title": "LocalAI - Chat with " + c.Params("model"),
"Title": "LocalAI - Chat with " + modelName,
"BaseURL": utils.BaseURL(c),
"ModelsConfig": modelConfigs,
"GalleryConfig": galleryConfigs,
"ModelsWithoutConfig": modelsWithoutConfig,
"Model": c.Params("model"),
"Model": modelName,
"ContextSize": modelContextSize,
"Version": internal.PrintableVersion(),
}

View File

@@ -200,6 +200,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
"jobID": jobID,
"isDeletion": isDeletionOp,
"trustRemoteCode": trustRemoteCodeExists,
"additionalFiles": m.AdditionalFiles,
})
}
@@ -247,7 +248,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
uid := id.String()
opcache.Set(galleryID, uid)
op := services.GalleryOp[gallery.GalleryModel]{
op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uid,
GalleryElementName: galleryID,
Galleries: appConfig.Galleries,
@@ -290,7 +291,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
opcache.Set(galleryID, uid)
op := services.GalleryOp[gallery.GalleryModel]{
op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uid,
Delete: true,
GalleryElementName: galleryName,
@@ -525,7 +526,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
uid := id.String()
opcache.Set(backendID, uid)
op := services.GalleryOp[gallery.GalleryBackend]{
op := services.GalleryOp[gallery.GalleryBackend, any]{
ID: uid,
GalleryElementName: backendID,
Galleries: appConfig.BackendGalleries,
@@ -567,7 +568,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
opcache.Set(backendID, uid)
op := services.GalleryOp[gallery.GalleryBackend]{
op := services.GalleryOp[gallery.GalleryBackend, any]{
ID: uid,
Delete: true,
GalleryElementName: backendName,

View File

@@ -27,21 +27,85 @@ SOFTWARE.
*/
// Global variable to store the current AbortController
let currentAbortController = null;
let currentReader = null;
let requestStartTime = null;
let tokensReceived = 0;
let tokensPerSecondInterval = null;
let lastTokensPerSecond = null; // Store the last calculated rate
function toggleLoader(show) {
const loader = document.getElementById('loader');
const sendButton = document.getElementById('send-button');
const stopButton = document.getElementById('stop-button');
const headerLoadingIndicator = document.getElementById('header-loading-indicator');
const tokensPerSecondDisplay = document.getElementById('tokens-per-second');
if (show) {
loader.style.display = 'block';
sendButton.style.display = 'none';
document.getElementById("input").disabled = true;
stopButton.style.display = 'block';
if (headerLoadingIndicator) headerLoadingIndicator.style.display = 'block';
// Reset token tracking
requestStartTime = Date.now();
tokensReceived = 0;
// Start updating tokens/second display
if (tokensPerSecondDisplay) {
tokensPerSecondDisplay.textContent = '-';
updateTokensPerSecond();
tokensPerSecondInterval = setInterval(updateTokensPerSecond, 500); // Update every 500ms
}
} else {
document.getElementById("input").disabled = false;
loader.style.display = 'none';
sendButton.style.display = 'block';
stopButton.style.display = 'none';
if (headerLoadingIndicator) headerLoadingIndicator.style.display = 'none';
// Stop updating but keep the last value visible
if (tokensPerSecondInterval) {
clearInterval(tokensPerSecondInterval);
tokensPerSecondInterval = null;
}
// Keep the last calculated rate visible
if (tokensPerSecondDisplay && lastTokensPerSecond !== null) {
tokensPerSecondDisplay.textContent = lastTokensPerSecond;
}
currentAbortController = null;
currentReader = null;
requestStartTime = null;
tokensReceived = 0;
}
}
function updateTokensPerSecond() {
const tokensPerSecondDisplay = document.getElementById('tokens-per-second');
if (!tokensPerSecondDisplay || !requestStartTime) return;
const elapsedSeconds = (Date.now() - requestStartTime) / 1000;
if (elapsedSeconds > 0 && tokensReceived > 0) {
const rate = tokensReceived / elapsedSeconds;
const formattedRate = `${rate.toFixed(1)} tokens/s`;
tokensPerSecondDisplay.textContent = formattedRate;
lastTokensPerSecond = formattedRate; // Store the last calculated rate
} else if (elapsedSeconds > 0) {
tokensPerSecondDisplay.textContent = '-';
}
}
function stopRequest() {
if (currentAbortController) {
currentAbortController.abort();
currentAbortController = null;
}
if (currentReader) {
currentReader.cancel();
currentReader = null;
}
toggleLoader(false);
Alpine.store("chat").add(
"assistant",
`<span class='error'>Request cancelled by user</span>`,
);
}
function processThinkingTags(content) {
const thinkingRegex = /<thinking>(.*?)<\/thinking>|<think>(.*?)<\/think>/gs;
const parts = content.split(thinkingRegex);
@@ -149,9 +213,30 @@ function readInputFile() {
function submitPrompt(event) {
event.preventDefault();
const input = document.getElementById("input");
if (!input) return;
const input = document.getElementById("input").value;
let fullInput = input;
const inputValue = input.value;
if (!inputValue.trim()) return; // Don't send empty messages
// If already processing, abort the current request and send the new one
if (currentAbortController || currentReader) {
// Abort current request
stopRequest();
// Small delay to ensure cleanup completes
setTimeout(() => {
// Continue with new request
processAndSendMessage(inputValue);
}, 100);
return;
}
processAndSendMessage(inputValue);
}
function processAndSendMessage(inputValue) {
let fullInput = inputValue;
// If there are file contents, append them to the input for the LLM
if (fileContents.length > 0) {
@@ -162,7 +247,7 @@ function submitPrompt(event) {
}
// Show file icons in chat if there are files
let displayContent = input;
let displayContent = inputValue;
if (currentFileNames.length > 0) {
displayContent += "\n\n";
currentFileNames.forEach(fileName => {
@@ -179,9 +264,15 @@ function submitPrompt(event) {
history[history.length - 1].content = fullInput;
}
document.getElementById("input").value = "";
const input = document.getElementById("input");
if (input) input.value = "";
const systemPrompt = localStorage.getItem("system_prompt");
Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); });
// Reset token tracking before starting new request
requestStartTime = Date.now();
tokensReceived = 0;
promptGPT(systemPrompt, fullInput);
// Reset file contents and names after sending
@@ -220,6 +311,12 @@ function readInputAudio() {
async function promptGPT(systemPrompt, input) {
const model = document.getElementById("chat-model").value;
const mcpMode = Alpine.store("chat").mcpMode;
// Reset current request usage tracking for new request
if (Alpine.store("chat")) {
Alpine.store("chat").tokenUsage.currentRequest = null;
}
toggleLoader(true);
messages = Alpine.store("chat").messages();
@@ -295,8 +392,9 @@ async function promptGPT(systemPrompt, input) {
let response;
try {
// Create AbortController for timeout handling
// Create AbortController for timeout handling and stop button
const controller = new AbortController();
currentAbortController = controller; // Store globally so stop button can abort it
const timeoutId = setTimeout(() => controller.abort(), mcpMode ? 300000 : 30000); // 5 minutes for MCP, 30 seconds for regular
response = await fetch(endpoint, {
@@ -311,11 +409,20 @@ async function promptGPT(systemPrompt, input) {
clearTimeout(timeoutId);
} catch (error) {
// Don't show error if request was aborted by user (stop button)
if (error.name === 'AbortError') {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Request timeout: MCP processing is taking longer than expected. Please try again.</span>`,
);
// Check if this was a user-initiated abort (stop button was clicked)
// If currentAbortController is null, it means stopRequest() was called and already handled the UI
if (!currentAbortController) {
// User clicked stop button - error message already shown by stopRequest()
return;
} else {
// Timeout error (controller was aborted by timeout, not user)
Alpine.store("chat").add(
"assistant",
`<span class='error'>Request timeout: MCP processing is taking longer than expected. Please try again.</span>`,
);
}
} else {
Alpine.store("chat").add(
"assistant",
@@ -323,6 +430,7 @@ async function promptGPT(systemPrompt, input) {
);
}
toggleLoader(false);
currentAbortController = null;
return;
}
@@ -332,6 +440,7 @@ async function promptGPT(systemPrompt, input) {
`<span class='error'>Error: POST ${endpoint} ${response.status}</span>`,
);
toggleLoader(false);
currentAbortController = null;
return;
}
@@ -339,10 +448,35 @@ async function promptGPT(systemPrompt, input) {
// Handle MCP non-streaming response
try {
const data = await response.json();
// MCP endpoint returns content in choices[0].text, not choices[0].message.content
const content = data.choices[0]?.text || "";
// Update token usage if present
if (data.usage) {
Alpine.store("chat").updateTokenUsage(data.usage);
}
// MCP endpoint returns content in choices[0].message.content (chat completion format)
// Fallback to choices[0].text for backward compatibility (completion format)
const content = data.choices[0]?.message?.content || data.choices[0]?.text || "";
if (!content && (!data.choices || data.choices.length === 0)) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Empty response from MCP endpoint</span>`,
);
toggleLoader(false);
return;
}
if (content) {
// Count tokens for rate calculation (MCP mode - full content at once)
// Prefer actual token count from API if available
if (data.usage && data.usage.completion_tokens) {
tokensReceived = data.usage.completion_tokens;
} else {
tokensReceived += Math.ceil(content.length / 4);
}
updateTokensPerSecond();
// Process thinking tags using shared function
const { regularContent, thinkingContent } = processThinkingTags(content);
@@ -360,10 +494,15 @@ async function promptGPT(systemPrompt, input) {
// Highlight all code blocks
hljs.highlightAll();
} catch (error) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to parse MCP response</span>`,
);
// Don't show error if request was aborted by user
if (error.name !== 'AbortError' || currentAbortController) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to parse MCP response</span>`,
);
}
} finally {
currentAbortController = null;
}
} else {
// Handle regular streaming response
@@ -376,13 +515,20 @@ async function promptGPT(systemPrompt, input) {
"assistant",
`<span class='error'>Error: Failed to decode API response</span>`,
);
toggleLoader(false);
return;
}
// Store reader globally so stop button can cancel it
currentReader = reader;
// Function to add content to the chat and handle DOM updates efficiently
const addToChat = (token) => {
const chatStore = Alpine.store("chat");
chatStore.add("assistant", token);
// Count tokens for rate calculation (rough estimate: count characters/4)
tokensReceived += Math.ceil(token.length / 4);
updateTokensPerSecond();
// Efficiently scroll into view without triggering multiple reflows
// const messages = document.getElementById('messages');
// messages.scrollTop = messages.scrollHeight;
@@ -413,6 +559,12 @@ async function promptGPT(systemPrompt, input) {
if (line.startsWith("data: ")) {
try {
const jsonData = JSON.parse(line.substring(6));
// Update token usage if present
if (jsonData.usage) {
Alpine.store("chat").updateTokenUsage(jsonData.usage);
}
const token = jsonData.choices[0].delta.content;
if (token) {
@@ -437,6 +589,9 @@ async function promptGPT(systemPrompt, input) {
// Handle content based on thinking state
if (isThinking) {
thinkingContent += token;
// Count tokens for rate calculation
tokensReceived += Math.ceil(token.length / 4);
updateTokensPerSecond();
// Update the last thinking message or create a new one
if (lastThinkingMessageIndex === -1) {
// Create new thinking message
@@ -479,13 +634,20 @@ async function promptGPT(systemPrompt, input) {
// Highlight all code blocks once at the end
hljs.highlightAll();
} catch (error) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to process stream</span>`,
);
// Don't show error if request was aborted by user
if (error.name !== 'AbortError' || !currentAbortController) {
Alpine.store("chat").add(
"assistant",
`<span class='error'>Error: Failed to process stream</span>`,
);
}
} finally {
// Perform any cleanup if necessary
reader.releaseLock();
if (reader) {
reader.releaseLock();
}
currentReader = null;
currentAbortController = null;
}
}
@@ -518,14 +680,71 @@ marked.setOptions({
},
});
// Alpine store is now initialized in chat.html inline script to ensure it's available before Alpine processes the DOM
// Only initialize if not already initialized (to avoid duplicate initialization)
document.addEventListener("alpine:init", () => {
Alpine.store("chat", {
// Check if store already exists (initialized in chat.html)
if (!Alpine.store("chat")) {
// Fallback initialization (should not be needed if chat.html loads correctly)
Alpine.store("chat", {
history: [],
languages: [undefined],
systemPrompt: "",
mcpMode: false,
contextSize: null,
tokenUsage: {
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
currentRequest: null
},
clear() {
this.history.length = 0;
this.tokenUsage = {
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
currentRequest: null
};
},
updateTokenUsage(usage) {
// Usage values in streaming responses are cumulative totals for the current request
// We track session totals separately and only update when we see new (higher) values
if (usage) {
const currentRequest = this.tokenUsage.currentRequest || {
promptTokens: 0,
completionTokens: 0,
totalTokens: 0
};
// Check if this is a new/updated usage (values increased)
const isNewUsage =
(usage.prompt_tokens !== undefined && usage.prompt_tokens > currentRequest.promptTokens) ||
(usage.completion_tokens !== undefined && usage.completion_tokens > currentRequest.completionTokens) ||
(usage.total_tokens !== undefined && usage.total_tokens > currentRequest.totalTokens);
if (isNewUsage) {
// Update session totals: subtract old request usage, add new
this.tokenUsage.promptTokens = this.tokenUsage.promptTokens - currentRequest.promptTokens + (usage.prompt_tokens || 0);
this.tokenUsage.completionTokens = this.tokenUsage.completionTokens - currentRequest.completionTokens + (usage.completion_tokens || 0);
this.tokenUsage.totalTokens = this.tokenUsage.totalTokens - currentRequest.totalTokens + (usage.total_tokens || 0);
// Store current request usage
this.tokenUsage.currentRequest = {
promptTokens: usage.prompt_tokens || 0,
completionTokens: usage.completion_tokens || 0,
totalTokens: usage.total_tokens || 0
};
}
}
},
getRemainingTokens() {
if (!this.contextSize) return null;
return Math.max(0, this.contextSize - this.tokenUsage.totalTokens);
},
getContextUsagePercent() {
if (!this.contextSize) return null;
return Math.min(100, (this.tokenUsage.totalTokens / this.contextSize) * 100);
},
add(role, content, image, audio) {
const N = this.history.length - 1;
@@ -590,5 +809,6 @@ document.addEventListener("alpine:init", () => {
audio: message.audio,
}));
},
});
});
}
});

View File

@@ -305,7 +305,7 @@
class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3"
loading="lazy">
</div>
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400" x-text="selectedBackend?.description"></p>
<div class="text-base leading-relaxed text-gray-500 dark:text-gray-400 break-words max-w-full markdown-content" x-html="renderMarkdown(selectedBackend?.description)"></div>
<template x-if="selectedBackend?.tags && selectedBackend.tags.length > 0">
<div>
<p class="text-sm mb-3 font-semibold text-gray-900 dark:text-white">Tags</p>
@@ -439,6 +439,42 @@ tbody tr:last-child td:first-child {
tbody tr:last-child td:last-child {
border-bottom-right-radius: 1rem;
}
/* Markdown content overflow handling */
.markdown-content {
word-wrap: break-word;
overflow-wrap: anywhere;
max-width: 100%;
}
.markdown-content pre {
overflow-x: auto;
max-width: 100%;
white-space: pre-wrap;
word-wrap: break-word;
}
.markdown-content code {
word-wrap: break-word;
overflow-wrap: break-word;
}
.markdown-content pre code {
white-space: pre;
overflow-x: auto;
display: block;
}
.markdown-content table {
max-width: 100%;
overflow-x: auto;
display: block;
}
.markdown-content img {
max-width: 100%;
height: auto;
}
</style>
<script>
@@ -599,6 +635,20 @@ function backendsGallery() {
}
},
renderMarkdown(text) {
if (!text) return '';
try {
if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
return text; // Return plain text if libraries not loaded
}
const html = marked.parse(text);
return DOMPurify.sanitize(html);
} catch (error) {
console.error('Error rendering markdown:', error);
return text;
}
},
openModal(backend) {
this.selectedBackend = backend;
},

View File

@@ -28,12 +28,167 @@ SOFTWARE.
<!doctype html>
<html lang="en">
{{template "views/partials/head" .}}
<script defer src="static/chat.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
<script>
// Initialize PDF.js worker
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
</script>
<script>
// Initialize Alpine store - must run before Alpine processes DOM
// Get context size from template
var __chatContextSize = null;
{{ if .ContextSize }}
__chatContextSize = {{ .ContextSize }};
{{ end }}
// Function to initialize store
function __initChatStore() {
if (!window.Alpine) return;
if (Alpine.store("chat")) {
Alpine.store("chat").contextSize = __chatContextSize;
return;
}
Alpine.store("chat", {
history: [],
languages: [undefined],
systemPrompt: "",
mcpMode: false,
contextSize: __chatContextSize,
tokenUsage: {
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
currentRequest: null
},
clear() {
this.history.length = 0;
this.tokenUsage = {
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
currentRequest: null
};
},
updateTokenUsage(usage) {
// Usage values in streaming responses are cumulative totals for the current request
// We track session totals separately and only update when we see new (higher) values
if (usage) {
const currentRequest = this.tokenUsage.currentRequest || {
promptTokens: 0,
completionTokens: 0,
totalTokens: 0
};
// Check if this is a new/updated usage (values increased)
const isNewUsage =
(usage.prompt_tokens !== undefined && usage.prompt_tokens > currentRequest.promptTokens) ||
(usage.completion_tokens !== undefined && usage.completion_tokens > currentRequest.completionTokens) ||
(usage.total_tokens !== undefined && usage.total_tokens > currentRequest.totalTokens);
if (isNewUsage) {
// Update session totals: subtract old request usage, add new
this.tokenUsage.promptTokens = this.tokenUsage.promptTokens - currentRequest.promptTokens + (usage.prompt_tokens || 0);
this.tokenUsage.completionTokens = this.tokenUsage.completionTokens - currentRequest.completionTokens + (usage.completion_tokens || 0);
this.tokenUsage.totalTokens = this.tokenUsage.totalTokens - currentRequest.totalTokens + (usage.total_tokens || 0);
// Store current request usage
this.tokenUsage.currentRequest = {
promptTokens: usage.prompt_tokens || 0,
completionTokens: usage.completion_tokens || 0,
totalTokens: usage.total_tokens || 0
};
}
}
},
getRemainingTokens() {
if (!this.contextSize) return null;
return Math.max(0, this.contextSize - this.tokenUsage.totalTokens);
},
getContextUsagePercent() {
if (!this.contextSize) return null;
return Math.min(100, (this.tokenUsage.totalTokens / this.contextSize) * 100);
},
add(role, content, image, audio) {
const N = this.history.length - 1;
// For thinking messages, always create a new message
if (role === "thinking") {
let c = "";
const lines = content.split("\n");
lines.forEach((line) => {
c += DOMPurify.sanitize(marked.parse(line));
});
this.history.push({ role, content, html: c, image, audio });
}
// For other messages, merge if same role
else if (this.history.length && this.history[N].role === role) {
this.history[N].content += content;
this.history[N].html = DOMPurify.sanitize(
marked.parse(this.history[N].content)
);
// Merge new images and audio with existing ones
if (image && image.length > 0) {
this.history[N].image = [...(this.history[N].image || []), ...image];
}
if (audio && audio.length > 0) {
this.history[N].audio = [...(this.history[N].audio || []), ...audio];
}
} else {
let c = "";
const lines = content.split("\n");
lines.forEach((line) => {
c += DOMPurify.sanitize(marked.parse(line));
});
this.history.push({
role,
content,
html: c,
image: image || [],
audio: audio || []
});
}
document.getElementById('messages').scrollIntoView(false);
const parser = new DOMParser();
const html = parser.parseFromString(
this.history[this.history.length - 1].html,
"text/html"
);
const code = html.querySelectorAll("pre code");
if (!code.length) return;
code.forEach((el) => {
const language = el.className.split("language-")[1];
if (this.languages.includes(language)) return;
const script = document.createElement("script");
script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
document.head.appendChild(script);
this.languages.push(language);
});
},
messages() {
return this.history.map((message) => ({
role: message.role,
content: message.content,
image: message.image,
audio: message.audio,
}));
},
});
}
// Register listener immediately (before Alpine loads)
document.addEventListener("alpine:init", __initChatStore);
// Also try immediately in case Alpine is already loaded
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', function() {
if (window.Alpine) __initChatStore();
});
} else {
// DOM already loaded, try immediately
if (window.Alpine) __initChatStore();
}
</script>
<script defer src="static/chat.js"></script>
{{ $allGalleryConfigs:=.GalleryConfig }}
{{ $model:=.Model}}
<body class="bg-[#101827] text-[#E5E7EB] flex flex-col h-screen" x-data="{ sidebarOpen: true }">
@@ -42,13 +197,22 @@ SOFTWARE.
<!-- Main container with sidebar toggle -->
<div class="flex flex-1 overflow-hidden relative">
<!-- Sidebar -->
<div
<div
class="sidebar bg-[#1E293B] fixed top-16 bottom-0 left-0 w-64 transform transition-transform duration-300 ease-in-out z-30 border-r border-[#101827] overflow-y-auto"
:class="sidebarOpen ? 'translate-x-0' : '-translate-x-full'">
<div class="p-4 flex justify-between items-center border-b border-[#101827]">
<h2 class="text-lg font-semibold text-[#E5E7EB]">Chat Settings</h2>
<button
<div class="flex items-center gap-2">
<h2 class="text-lg font-semibold text-[#E5E7EB]">Chat Settings</h2>
<a
href="https://localai.io/features/text-generation/"
target="_blank"
class="text-[#94A3B8] hover:text-[#38BDF8] transition-colors"
title="Documentation">
<i class="fas fa-book text-sm"></i>
</a>
</div>
<button
@click="sidebarOpen = false"
class="text-[#94A3B8] hover:text-[#E5E7EB] focus:outline-none">
<i class="fa-solid fa-times"></i>
@@ -66,7 +230,7 @@ SOFTWARE.
onchange="window.location = this.value"
>
<option value="" disabled class="text-[#94A3B8]">Select a model</option>
{{ range .ModelsConfig }}
{{ $cfg := . }}
{{ range .KnownUsecaseStrings }}
@@ -101,61 +265,98 @@ SOFTWARE.
<div class="flex items-center">
{{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
<h3 class="text-md font-medium">{{ $model }}</h3>
</div>
<button data-twe-ripple-init data-twe-ripple-color="light" class="w-full text-left flex items-center px-3 py-2 text-xs rounded text-[#E5E7EB] bg-[#101827] hover:bg-[#101827]/80 border border-[#38BDF8]/20 transition-colors" data-modal-target="model-info-modal" data-modal-toggle="model-info-modal">
<i class="fas fa-info-circle mr-2 text-[#38BDF8]"></i>
Model Information
</button>
</div>
{{ end }}
{{ end }}
<div x-data="{ activeTab: 'actions' }" class="space-y-4">
<!-- Tab navigation -->
<div class="flex border-b border-[#101827]">
<button
@click="activeTab = 'actions'"
:class="activeTab === 'actions' ? 'border-b-2 border-[#38BDF8] text-[#E5E7EB]' : 'text-[#94A3B8] hover:text-[#E5E7EB]'"
class="py-2 px-4 text-sm font-medium">
Actions
data-twe-ripple-init
data-twe-ripple-color="light"
class="ml-2 text-[#94A3B8] hover:text-[#38BDF8] transition-colors"
data-modal-target="model-info-modal"
data-modal-toggle="model-info-modal"
title="Model Information">
<i class="fas fa-info-circle text-sm"></i>
</button>
<button
@click="activeTab = 'settings'"
:class="activeTab === 'settings' ? 'border-b-2 border-[#38BDF8] text-[#E5E7EB]' : 'text-[#94A3B8] hover:text-[#E5E7EB]'"
class="py-2 px-4 text-sm font-medium">
Settings
</button>
</div>
<!-- Actions tab -->
<div x-show="activeTab === 'actions'" class="space-y-3">
<button
@click="$store.chat.clear()"
id="clear"
title="Clear chat history"
class="w-full flex items-center px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#101827] hover:bg-[#101827]/80 border border-[#1E293B] transition-colors"
>
<i class="fa-solid fa-trash-can mr-2"></i> Clear chat
class="ml-2 text-[#94A3B8] hover:text-[#38BDF8] transition-colors">
<i class="fa-solid fa-trash-can text-sm"></i>
</button>
<a
href="https://localai.io/features/text-generation/"
target="_blank"
class="w-full flex items-center px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
>
<i class="fas fa-book mr-2"></i> Documentation
</a>
<a
href="browse?term={{.Model}}"
class="w-full flex items-center px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
>
<i class="fas fa-brain mr-2"></i> Browse Model
</a>
</div>
</div>
{{ end }}
{{ end }}
<div x-data="{ showPromptForm: false }" class="space-y-3">
<!-- Token Usage Statistics -->
<div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-3 space-y-2">
<div class="flex items-center justify-between mb-2">
<h4 class="text-sm font-semibold text-[#E5E7EB] flex items-center">
<i class="fas fa-chart-line mr-2 text-[#38BDF8]"></i>
Token Usage
</h4>
</div>
<div class="space-y-1.5 text-xs">
<div class="flex justify-between text-[#94A3B8]">
<span>Prompt:</span>
<span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.promptTokens)"></span>
</div>
<div class="flex justify-between text-[#94A3B8]">
<span>Completion:</span>
<span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.completionTokens)"></span>
</div>
<div class="flex justify-between text-[#94A3B8] border-t border-[#101827] pt-1.5">
<span class="font-semibold text-[#38BDF8]">Total:</span>
<span class="text-[#E5E7EB] font-bold" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
</div>
</div>
</div>
<!-- Context Size Indicator -->
<template x-if="$store.chat.contextSize && $store.chat.contextSize > 0">
<div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-3 space-y-2">
<div class="flex items-center justify-between mb-2">
<h4 class="text-sm font-semibold text-[#E5E7EB] flex items-center">
<i class="fas fa-database mr-2 text-[#38BDF8]"></i>
Context Window
</h4>
</div>
<div class="space-y-2">
<div class="flex justify-between text-xs text-[#94A3B8] mb-1">
<span>Used / Available</span>
<span class="text-[#E5E7EB] font-medium">
<span x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
/
<span x-text="new Intl.NumberFormat().format($store.chat.contextSize)"></span>
</span>
</div>
<div class="w-full bg-[#101827] rounded-full h-2 overflow-hidden border border-[#1E293B]">
<div class="h-full rounded-full transition-all duration-300 ease-out"
:class="{
'bg-gradient-to-r from-[#38BDF8] to-[#8B5CF6]': $store.chat.getContextUsagePercent() < 80,
'bg-gradient-to-r from-yellow-500 to-orange-500': $store.chat.getContextUsagePercent() >= 80 && $store.chat.getContextUsagePercent() < 95,
'bg-gradient-to-r from-red-500 to-red-600': $store.chat.getContextUsagePercent() >= 95
}"
:style="'width: ' + Math.min(100, $store.chat.getContextUsagePercent()) + '%'">
</div>
</div>
<div class="flex justify-between text-xs">
<span class="text-[#94A3B8]">
Remaining:
<span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.getRemainingTokens())"></span>
</span>
<span class="text-[#94A3B8]">
<span x-text="Math.round($store.chat.getContextUsagePercent())"></span>%
</span>
</div>
<div x-show="$store.chat.getContextUsagePercent() >= 80" class="mt-2 p-2 bg-yellow-500/10 border border-yellow-500/30 rounded text-yellow-300 text-xs">
<i class="fas fa-exclamation-triangle mr-1"></i>
<span x-show="$store.chat.getContextUsagePercent() >= 95">Context window nearly full!</span>
<span x-show="$store.chat.getContextUsagePercent() >= 80 && $store.chat.getContextUsagePercent() < 95">Approaching context limit</span>
</div>
</div>
</div>
</template>
<!-- Settings tab -->
<div x-show="activeTab === 'settings'" x-data="{ showPromptForm: false }" class="space-y-3">
{{ if $model }}
{{ $galleryConfig:= index $allGalleryConfigs $model}}
{{ if $galleryConfig }}
@@ -167,97 +368,120 @@ SOFTWARE.
{{ end }}
{{ if and $modelConfig (or (ne $modelConfig.MCP.Servers "") (ne $modelConfig.MCP.Stdio "")) }}
<!-- MCP Toggle -->
<div class="flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700">
<span><i class="fa-solid fa-plug mr-2"></i> Agentic MCP Mode</span>
<div class="flex items-center justify-between px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#1E293B] border border-[#38BDF8]/20">
<span><i class="fa-solid fa-plug mr-2 text-[#38BDF8]"></i> Agentic MCP Mode</span>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" id="mcp-toggle" class="sr-only peer" x-model="$store.chat.mcpMode">
<div class="w-11 h-6 bg-gray-600 peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-blue-300 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-blue-600"></div>
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/30 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-[#1E293B] after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
</label>
</div>
<!-- MCP Mode Notification -->
<div x-show="$store.chat.mcpMode" class="p-3 bg-blue-900/20 border border-blue-700/50 rounded text-blue-100 text-xs">
<div x-show="$store.chat.mcpMode" class="p-3 bg-[#38BDF8]/10 border border-[#38BDF8]/30 rounded text-[#94A3B8] text-xs">
<div class="flex items-start space-x-2">
<i class="fa-solid fa-info-circle text-blue-400 mt-0.5"></i>
<i class="fa-solid fa-info-circle text-[#38BDF8] mt-0.5"></i>
<div>
<p class="font-medium text-blue-200 mb-1">Non-streaming Mode Active</p>
<p class="text-blue-300">Responses will be processed in full before display. This may take significantly longer (up to 5 minutes), especially on CPU-only systems.</p>
<p class="font-medium text-[#E5E7EB] mb-1">Non-streaming Mode Active</p>
<p class="text-[#94A3B8]">Responses will be processed in full before display. This may take significantly longer (up to 5 minutes), especially on CPU-only systems.</p>
</div>
</div>
</div>
{{ end }}
{{ end }}
{{ end }}
<button
@click="showPromptForm = !showPromptForm"
class="w-full flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
<button
@click="showPromptForm = !showPromptForm"
class="w-full flex items-center justify-between px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 hover:border-[#38BDF8]/40 transition-colors glow-on-hover"
>
<span><i class="fa-solid fa-message mr-2"></i> System Prompt</span>
<span><i class="fa-solid fa-message mr-2 text-[#38BDF8]"></i> System Prompt</span>
<i :class="showPromptForm ? 'fa-chevron-up' : 'fa-chevron-down'" class="fa-solid"></i>
</button>
<div x-show="showPromptForm" class="p-3 bg-gray-700 rounded">
<form id="system_prompt" class="flex flex-col space-y-2">
<div x-show="showPromptForm" x-data="{
showToast: false,
previousPrompt: $store.chat.systemPrompt,
isUpdated() {
if (this.previousPrompt !== $store.chat.systemPrompt) {
this.showToast = true;
this.previousPrompt = $store.chat.systemPrompt;
setTimeout(() => {this.showToast = false;}, 2000);
}
}
}" class="p-3 bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg">
<form id="system_prompt" @submit.prevent="isUpdated" class="flex flex-col space-y-2">
<textarea
type="text"
id="systemPrompt"
name="systemPrompt"
class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none min-h-24"
class="bg-[#101827] text-[#E5E7EB] border border-[#1E293B] focus:border-[#38BDF8] focus:ring focus:ring-[#38BDF8] focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none min-h-24 placeholder-[#94A3B8]"
placeholder="System prompt"
x-model.lazy="$store.chat.systemPrompt"
></textarea>
<div
x-show="showToast"
x-transition
class="mb-2 text-green-400 px-4 py-2 text-sm text-center bg-green-500/10 border border-green-500/30 rounded"
>
System prompt updated!
</div>
<button
type="submit"
class="px-3 py-2 text-sm rounded text-white bg-blue-600 hover:bg-blue-700 transition-colors"
class="px-3 py-2 text-sm rounded text-[#101827] bg-[#38BDF8] hover:bg-[#38BDF8]/90 transition-colors font-medium"
>
Save System Prompt
</button>
</form>
</div>
</div>
</div>
</div>
</div>
<!-- Main chat container (shifts with sidebar) -->
<div
<div
class="flex-1 flex flex-col transition-all duration-300 ease-in-out"
:class="sidebarOpen ? 'ml-64' : 'ml-0'">
<!-- Chat header with toggle button -->
<div class="border-b border-gray-700 p-4 flex items-center">
<!-- Sidebar toggle button moved to be the first element in the header and with clear styling -->
<button
@click="sidebarOpen = !sidebarOpen"
class="mr-4 text-gray-300 hover:text-white focus:outline-none bg-gray-800 hover:bg-gray-700 p-2 rounded"
style="min-width: 36px;"
title="Toggle settings">
<i class="fa-solid" :class="sidebarOpen ? 'fa-times' : 'fa-bars'"></i>
</button>
<div class="border-b border-[#1E293B] p-4 flex items-center justify-between">
<div class="flex items-center">
<i class="fa-solid fa-comments mr-2"></i>
{{ if $model }}
{{ $galleryConfig:= index $allGalleryConfigs $model}}
{{ if $galleryConfig }}
{{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
{{ end }}
{{ end }}
<h1 class="text-lg font-semibold">
Chat {{ if .Model }} with {{.Model}} {{ end }}
</h1>
<!-- Sidebar toggle button moved to be the first element in the header and with clear styling -->
<button
@click="sidebarOpen = !sidebarOpen"
class="mr-4 text-[#94A3B8] hover:text-[#E5E7EB] focus:outline-none bg-[#1E293B] hover:bg-[#1E293B]/80 p-2 rounded transition-colors"
style="min-width: 36px;"
title="Toggle settings">
<i class="fa-solid" :class="sidebarOpen ? 'fa-times' : 'fa-bars'"></i>
</button>
<div class="flex items-center">
<i class="fa-solid fa-comments mr-2 text-[#38BDF8]"></i>
{{ if $model }}
{{ $galleryConfig:= index $allGalleryConfigs $model}}
{{ if $galleryConfig }}
{{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
{{ end }}
{{ end }}
<h1 class="text-lg font-semibold text-[#E5E7EB]">
Chat {{ if .Model }} with {{.Model}} {{ end }}
</h1>
<!-- Loading indicator next to model name -->
<div id="header-loading-indicator" class="ml-3 text-[#38BDF8]" style="display: none;">
<i class="fas fa-spinner fa-spin text-sm"></i>
</div>
</div>
</div>
</div>
<!-- Chat messages area -->
<div class="flex-1 p-4 overflow-auto" id="chat" x-data="{history: $store.chat.history}">
<p id="usage" x-show="history.length === 0" class="text-gray-300">
<p id="usage" x-show="history.length === 0" class="text-[#94A3B8]">
Start chatting with the AI by typing a prompt in the input field below and pressing Enter.<br>
<ul class="list-disc list-inside">
<li>For models that support images, you can upload an image by clicking the <i class="fa-solid fa-image"></i> icon.</li>
<li>For models that support audio, you can upload an audio file by clicking the <i class="fa-solid fa-microphone"></i> icon.</li>
<li>To send a text, markdown or PDF file, click the <i class="fa-solid fa-file"></i> icon.</li>
<ul class="list-disc list-inside mt-2 space-y-1">
<li>For models that support images, you can upload an image by clicking the <i class="fa-solid fa-image text-[#38BDF8]"></i> icon.</li>
<li>For models that support audio, you can upload an audio file by clicking the <i class="fa-solid fa-microphone text-[#38BDF8]"></i> icon.</li>
<li>To send a text, markdown or PDF file, click the <i class="fa-solid fa-file text-[#38BDF8]"></i> icon.</li>
</ul>
</p>
<div id="messages" class="max-w-3xl mx-auto">
@@ -268,8 +492,8 @@ SOFTWARE.
<template x-if="message.role === 'user'">
<div class="flex items-center space-x-2">
<div class="flex flex-col flex-1 items-end">
<span class="text-xs font-semibold text-gray-400">You</span>
<div class="p-2 flex-1 rounded bg-gray-700 text-white" x-html="message.html"></div>
<span class="text-xs font-semibold text-[#94A3B8] mb-1">You</span>
<div class="p-3 flex-1 rounded-lg bg-gradient-to-br from-[#1E293B] to-[#101827] text-[#E5E7EB] border border-[#38BDF8]/20 shadow-lg" x-html="message.html"></div>
<template x-if="message.image && message.image.length > 0">
<div class="mt-2 space-y-2">
<template x-for="(img, index) in message.image" :key="index">
@@ -293,12 +517,12 @@ SOFTWARE.
<template x-if="message.role === 'thinking'">
<div class="flex items-center space-x-2 w-full">
<div class="flex flex-col flex-1">
<div class="p-2 flex-1 rounded bg-blue-900/50 text-blue-100 border border-blue-700/50">
<div class="flex items-center space-x-2">
<i class="fa-solid fa-brain text-blue-400"></i>
<span class="text-xs font-semibold text-blue-300">Thinking</span>
<div class="p-3 flex-1 rounded-lg bg-[#38BDF8]/10 text-[#94A3B8] border border-[#38BDF8]/30">
<div class="flex items-center space-x-2 mb-2">
<i class="fa-solid fa-brain text-[#38BDF8]"></i>
<span class="text-xs font-semibold text-[#38BDF8]">Thinking</span>
</div>
<div class="mt-1" x-html="message.html"></div>
<div class="mt-1 text-[#E5E7EB]" x-html="message.html"></div>
</div>
</div>
</div>
@@ -306,13 +530,13 @@ SOFTWARE.
<template x-if="message.role != 'user' && message.role != 'thinking'">
<div class="flex items-center space-x-2">
{{ if $galleryConfig }}
{{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg mt-2 max-w-8 max-h-8">{{end}}
{{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[#38BDF8]/20">{{end}}
{{ end }}
<div class="flex flex-col flex-1">
<span class="text-xs font-semibold text-gray-400">{{if .Model}}{{.Model}}{{else}}Assistant{{end}}</span>
<div class="flex-1 text-white flex items-center space-x-2">
<div x-html="message.html"></div>
<button @click="copyToClipboard(message.html)" title="Copy to clipboard" class="text-gray-400 hover:text-gray-100">
<span class="text-xs font-semibold text-[#94A3B8] mb-1">{{if .Model}}{{.Model}}{{else}}Assistant{{end}}</span>
<div class="flex-1 text-[#E5E7EB] flex items-center space-x-2">
<div class="p-3 rounded-lg bg-gradient-to-br from-[#1E293B] to-[#101827] border border-[#8B5CF6]/20 shadow-lg" x-html="message.html"></div>
<button @click="copyToClipboard(message.html)" title="Copy to clipboard" class="text-[#94A3B8] hover:text-[#38BDF8] transition-colors p-1">
<i class="fa-solid fa-copy"></i>
</button>
</div>
@@ -339,7 +563,7 @@ SOFTWARE.
{{ else }}
<i
class="fa-solid h-8 w-8"
:class="message.role === 'user' ? 'fa-user' : 'fa-robot'"
:class="message.role === 'user' ? 'fa-user text-[#38BDF8]' : 'fa-robot text-[#8B5CF6]'"
></i>
{{ end }}
</div>
@@ -349,65 +573,119 @@ SOFTWARE.
<!-- Chat Input -->
<div class="p-4 border-t border-gray-700" x-data="{ inputValue: '', shiftPressed: false, fileName: '', isLoading: false }">
<div class="p-4 border-t border-[#1E293B]" x-data="{ inputValue: '', shiftPressed: false, fileName: '' }">
<form id="prompt" action="chat/{{.Model}}" method="get" @submit.prevent="submitPrompt" class="max-w-3xl mx-auto">
<div class="relative w-full bg-gray-800 rounded-xl shadow-md">
<!-- Token Usage and Context Window - Compact above input -->
<div class="mb-3 flex items-center justify-between gap-4 text-xs">
<!-- Token Usage -->
<div class="flex items-center gap-3 text-[#94A3B8]">
<div class="flex items-center gap-1">
<i class="fas fa-chart-line text-[#38BDF8]"></i>
<span>Prompt:</span>
<span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.promptTokens)"></span>
</div>
<div class="flex items-center gap-1">
<span>Completion:</span>
<span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.completionTokens)"></span>
</div>
<div class="flex items-center gap-1 border-l border-[#1E293B] pl-3">
<span class="text-[#38BDF8] font-semibold">Total:</span>
<span class="text-[#E5E7EB] font-bold" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
</div>
<!-- Tokens per second display -->
<div id="tokens-per-second-container" class="flex items-center gap-1 border-l border-[#1E293B] pl-3">
<i class="fas fa-tachometer-alt text-[#38BDF8]"></i>
<span id="tokens-per-second" class="text-[#E5E7EB] font-medium">-</span>
</div>
</div>
<!-- Context Window -->
<template x-if="$store.chat.contextSize && $store.chat.contextSize > 0">
<div class="flex items-center gap-2 text-[#94A3B8]">
<i class="fas fa-database text-[#38BDF8]"></i>
<span>
<span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
/
<span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.contextSize)"></span>
</span>
<div class="w-16 bg-[#101827] rounded-full h-1.5 overflow-hidden border border-[#1E293B]">
<div class="h-full rounded-full transition-all duration-300 ease-out"
:class="{
'bg-gradient-to-r from-[#38BDF8] to-[#8B5CF6]': $store.chat.getContextUsagePercent() < 80,
'bg-gradient-to-r from-yellow-500 to-orange-500': $store.chat.getContextUsagePercent() >= 80 && $store.chat.getContextUsagePercent() < 95,
'bg-gradient-to-r from-red-500 to-red-600': $store.chat.getContextUsagePercent() >= 95
}"
:style="'width: ' + Math.min(100, $store.chat.getContextUsagePercent()) + '%'">
</div>
</div>
<span class="text-[#94A3B8]" x-text="Math.round($store.chat.getContextUsagePercent()) + '%'"></span>
<span x-show="$store.chat.getContextUsagePercent() >= 80" class="text-yellow-400">
<i class="fas fa-exclamation-triangle"></i>
</span>
</div>
</template>
</div>
<div class="relative w-full bg-[#1E293B] border border-[#38BDF8]/20 rounded-xl shadow-lg">
<textarea
id="input"
name="input"
x-model="inputValue"
placeholder="Send a message..."
class="p-4 pr-16 w-full bg-gray-800 text-gray-100 placeholder-gray-400 focus:outline-none resize-none border-0 rounded-xl transition-colors duration-200"
class="p-3 pr-16 w-full bg-[#1E293B] text-[#E5E7EB] placeholder-[#94A3B8] focus:outline-none resize-none border-0 rounded-xl transition-colors duration-200 focus:ring-2 focus:ring-[#38BDF8]/50"
required
@keydown.shift="shiftPressed = true"
@keyup.shift="shiftPressed = false"
@keydown.enter="if (!shiftPressed) { submitPrompt($event); }"
rows="3"
style="box-shadow: 0 0 0 1px rgba(75, 85, 99, 0.4) inset;"
@keydown.enter.prevent="if (!shiftPressed) { submitPrompt($event); }"
rows="2"
></textarea>
<span x-text="fileName" id="fileName" class="absolute right-16 top-4 text-gray-400 text-sm mr-2"></span>
<span x-text="fileName" id="fileName" class="absolute right-16 top-3 text-[#94A3B8] text-xs mr-2"></span>
<button
type="button"
onclick="document.getElementById('input_image').click()"
class="fa-solid fa-image text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
class="fa-solid fa-image text-[#94A3B8] absolute right-12 top-3 text-base p-1.5 hover:text-[#38BDF8] transition-colors duration-200"
title="Attach images"
></button>
<button
type="button"
onclick="document.getElementById('input_audio').click()"
class="fa-solid fa-microphone text-gray-400 absolute right-20 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
class="fa-solid fa-microphone text-[#94A3B8] absolute right-20 top-3 text-base p-1.5 hover:text-[#38BDF8] transition-colors duration-200"
title="Attach an audio file"
></button>
<button
type="button"
onclick="document.getElementById('input_file').click()"
class="fa-solid fa-file text-gray-400 absolute right-28 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
class="fa-solid fa-file text-[#94A3B8] absolute right-28 top-3 text-base p-1.5 hover:text-[#38BDF8] transition-colors duration-200"
title="Upload text, markdown or PDF file"
></button>
<!-- Send button and loader in the same position -->
<div class="absolute right-3 top-4">
<!-- Loader (hidden by default) -->
<div id="loader" class="text-lg p-2" style="display: none;">
<svg class="animate-spin h-5 w-5 text-blue-500" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
</div>
<!-- Send button and stop button in the same position -->
<div class="absolute right-3 top-3 flex items-center">
<!-- Stop button (hidden by default, shown when request is in progress) -->
<button
id="stop-button"
type="button"
onclick="stopRequest()"
class="text-lg p-2 text-red-400 hover:text-red-500 transition-colors duration-200"
style="display: none;"
title="Stop request"
>
<i class="fa-solid fa-stop"></i>
</button>
<!-- Send button -->
<button
id="send-button"
type="submit"
class="text-lg p-2 text-gray-400 hover:text-blue-400 transition-colors duration-200"
title="Send message"
class="text-lg p-2 text-[#94A3B8] hover:text-[#38BDF8] transition-colors duration-200"
title="Send message (Enter)"
>
<i class="fa-solid fa-paper-plane"></i>
</button>
</div>
</div>
</form>
<input id="chat-model" type="hidden" value="{{.Model}}">
<input id="chat-model" type="hidden" value="{{.Model}}" {{ if .ContextSize }}data-context-size="{{.ContextSize}}"{{ end }}>
<input
id="input_image"
type="file"
@@ -437,7 +715,7 @@ SOFTWARE.
</div>
</div>
</div>
<!-- Modal moved outside of sidebar to appear in center of page -->
{{ if $model }}
{{ $galleryConfig:= index $allGalleryConfigs $model}}
@@ -455,22 +733,22 @@ SOFTWARE.
<span class="sr-only">Close modal</span>
</button>
</div>
<!-- Body -->
<div class="p-4 md:p-5 space-y-4">
<div class="flex justify-center items-center">
{{ if $galleryConfig.Icon }}<img class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded" src="{{$galleryConfig.Icon}}" loading="lazy"/>{{end}}
</div>
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">{{ $galleryConfig.Description }}</p>
<div id="model-info-description" class="text-base leading-relaxed text-gray-500 dark:text-gray-400 break-words max-w-full">{{ $galleryConfig.Description }}</div>
<hr>
<p class="text-sm font-semibold text-gray-900 dark:text-white">Links</p>
<ul>
{{range $galleryConfig.URLs}}
<li><a href="{{ . }}" target="_blank">{{ . }}</a></li>
{{end}}
</ul>
</ul>
</div>
<!-- Footer -->
<div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
<button data-modal-hide="model-info-modal" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">
@@ -483,59 +761,9 @@ SOFTWARE.
{{ end }}
{{ end }}
<!-- Alpine store initialization -->
<!-- Alpine store initialization and utilities -->
<script>
document.addEventListener("alpine:init", () => {
Alpine.store("chat", {
history: [],
languages: [undefined],
systemPrompt: "",
mcpMode: false,
clear() {
this.history.length = 0;
},
add(role, content, image, audio) {
const N = this.history.length - 1;
if (this.history.length && this.history[N].role === role) {
this.history[N].content += content;
this.history[N].html = DOMPurify.sanitize(
marked.parse(this.history[N].content)
);
} else {
let c = "";
const lines = content.split("\n");
lines.forEach((line) => {
c += DOMPurify.sanitize(marked.parse(line));
});
this.history.push({ role, content, html: c, image, audio });
}
document.getElementById('messages').scrollIntoView(false);
const parser = new DOMParser();
const html = parser.parseFromString(
this.history[this.history.length - 1].html,
"text/html"
);
const code = html.querySelectorAll("pre code");
if (!code.length) return;
code.forEach((el) => {
const language = el.className.split("language-")[1];
if (this.languages.includes(language)) return;
const script = document.createElement("script");
script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
document.head.appendChild(script);
this.languages.push(language);
});
},
messages() {
return this.history.map((message) => ({
role: message.role,
content: message.content,
image: message.image,
audio: message.audio,
}));
},
});
window.copyToClipboard = (content) => {
const tempElement = document.createElement('div');
tempElement.innerHTML = content;
@@ -548,6 +776,134 @@ SOFTWARE.
});
};
});
// Context size is now initialized in the Alpine store initialization above
// Process markdown in model info modal when it opens
function initMarkdownProcessing() {
// Wait for marked and DOMPurify to be available
if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
setTimeout(initMarkdownProcessing, 100);
return;
}
const modalElement = document.getElementById('model-info-modal');
const descriptionElement = document.getElementById('model-info-description');
if (!modalElement || !descriptionElement) {
return;
}
// Store original text in data attribute if not already stored
let originalText = descriptionElement.dataset.originalText;
if (!originalText) {
originalText = descriptionElement.textContent || descriptionElement.innerText;
descriptionElement.dataset.originalText = originalText;
}
// Process markdown function
const processMarkdown = () => {
if (!descriptionElement || !originalText) return;
try {
// Check if already processed (has HTML tags that look like markdown output)
const currentContent = descriptionElement.innerHTML.trim();
if (currentContent.startsWith('<') && (currentContent.includes('<p>') || currentContent.includes('<h') || currentContent.includes('<ul>') || currentContent.includes('<ol>'))) {
return; // Already processed
}
// Use stored original text
const textToProcess = descriptionElement.dataset.originalText || originalText;
if (textToProcess && textToProcess.trim()) {
const html = marked.parse(textToProcess);
descriptionElement.innerHTML = DOMPurify.sanitize(html);
}
} catch (error) {
console.error('Error rendering markdown:', error);
}
};
// Process immediately if modal is already visible
if (!modalElement.classList.contains('hidden')) {
processMarkdown();
}
// Listen for modal show events - check both aria-hidden and class changes
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
if (mutation.type === 'attributes') {
const isHidden = modalElement.classList.contains('hidden') ||
modalElement.getAttribute('aria-hidden') === 'true';
if (!isHidden) {
// Modal is now visible, process markdown
setTimeout(processMarkdown, 150);
}
}
});
});
observer.observe(modalElement, {
attributes: true,
attributeFilter: ['aria-hidden', 'class'],
childList: false,
subtree: false
});
// Also listen for click events on modal toggle buttons
document.querySelectorAll('[data-modal-toggle="model-info-modal"]').forEach(button => {
button.addEventListener('click', () => {
setTimeout(processMarkdown, 300);
});
});
// Process on initial load if libraries are ready
setTimeout(processMarkdown, 200);
}
// Start initialization
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', initMarkdownProcessing);
} else {
initMarkdownProcessing();
}
</script>
<style>
/* Markdown content overflow handling */
#model-info-description {
word-wrap: break-word;
overflow-wrap: anywhere;
max-width: 100%;
}
#model-info-description pre {
overflow-x: auto;
max-width: 100%;
white-space: pre-wrap;
word-wrap: break-word;
}
#model-info-description code {
word-wrap: break-word;
overflow-wrap: break-word;
}
#model-info-description pre code {
white-space: pre;
overflow-x: auto;
display: block;
}
#model-info-description table {
max-width: 100%;
overflow-x: auto;
display: block;
}
#model-info-description img {
max-width: 100%;
height: auto;
}
</style>
</body>
</html>
</html>

View File

@@ -3,9 +3,10 @@
{{template "views/partials/head" .}}
<body class="bg-[#101827] text-[#E5E7EB]">
<div class="flex flex-col min-h-screen">
<div class="flex flex-col min-h-screen" x-data="importModel()" x-init="init()">
{{template "views/partials/navbar" .}}
{{template "views/partials/inprogress" .}}
<div class="container mx-auto px-4 py-8 flex-grow">
<!-- Hero Header -->
@@ -24,19 +25,44 @@
{{if .ModelName}}Edit Model: {{.ModelName}}{{else}}Import New Model{{end}}
</span>
</h1>
<p class="text-lg text-gray-300 font-light">Configure your model settings using YAML</p>
<p class="text-lg text-gray-300 font-light" x-text="isAdvancedMode ? 'Configure your model settings using YAML' : 'Import a model from URI with preferences'"></p>
</div>
<div class="flex gap-3">
<button id="validateBtn" class="group relative inline-flex items-center bg-gradient-to-r from-blue-600 to-blue-700 hover:from-blue-700 hover:to-blue-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-blue-500/25">
<i class="fas fa-check mr-2 group-hover:animate-pulse"></i>
<span>Validate</span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
<button id="saveBtn" class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
<i class="fas fa-save mr-2 group-hover:animate-pulse"></i>
<span>{{if .ModelName}}Update{{else}}Create{{end}}</span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
<!-- Mode Toggle (only show when not in edit mode) -->
<template x-if="!isEditMode">
<button @click="toggleMode()"
class="group relative inline-flex items-center bg-gradient-to-r from-gray-600 to-gray-700 hover:from-gray-700 hover:to-gray-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl">
<i class="fas group-hover:animate-pulse" :class="isAdvancedMode ? 'fa-magic mr-2' : 'fa-code mr-2'"></i>
<span x-text="isAdvancedMode ? 'Simple Mode' : 'Advanced Mode'"></span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
</template>
<!-- Advanced Mode Buttons -->
<template x-if="isAdvancedMode">
<div class="flex gap-3">
<button id="validateBtn" class="group relative inline-flex items-center bg-gradient-to-r from-blue-600 to-blue-700 hover:from-blue-700 hover:to-blue-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-blue-500/25">
<i class="fas fa-check mr-2 group-hover:animate-pulse"></i>
<span>Validate</span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
<button id="saveBtn" class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
<i class="fas fa-save mr-2 group-hover:animate-pulse"></i>
<span>{{if .ModelName}}Update{{else}}Create{{end}}</span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
</div>
</template>
<!-- Simple Mode Button -->
<template x-if="!isAdvancedMode && !isEditMode">
<button @click="submitImport()"
:disabled="isSubmitting || !importUri.trim()"
:class="(isSubmitting || !importUri.trim()) ? 'opacity-50 cursor-not-allowed' : ''"
class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
<i class="fas group-hover:animate-pulse" :class="isSubmitting ? 'fa-spinner fa-spin mr-2' : 'fa-upload mr-2'"></i>
<span x-text="isSubmitting ? 'Importing...' : 'Import Model'"></span>
<div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
</button>
</template>
</div>
</div>
</div>
@@ -45,8 +71,187 @@
<!-- Alert Messages -->
<div id="alertContainer" class="mb-6"></div>
<!-- YAML Editor Panel -->
<div class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm h-[calc(100vh-250px)]">
<!-- Simple Import Mode -->
<div x-show="!isAdvancedMode && !isEditMode"
x-transition:enter="transition ease-out duration-300"
x-transition:enter-start="opacity-0 transform translate-y-4"
x-transition:enter-end="opacity-100 transform translate-y-0"
class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm p-8">
<div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-green-500/5 to-emerald-500/5"></div>
<div class="relative space-y-6">
<h2 class="text-2xl font-semibold text-white flex items-center gap-3 mb-6">
<div class="w-10 h-10 rounded-lg bg-green-500/20 flex items-center justify-center">
<i class="fas fa-link text-green-400"></i>
</div>
Import from URI
</h2>
<!-- URI Input -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-link mr-2"></i>Model URI
</label>
<input
x-model="importUri"
type="text"
placeholder="https://example.com/model.gguf or file:///path/to/model.gguf"
class="w-full px-4 py-3 bg-gray-900/90 border border-gray-700/70 rounded-xl text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-2 text-xs text-gray-400">
Enter the URI or path to the model file you want to import
</p>
</div>
<!-- Preferences Section -->
<div>
<div class="flex items-center justify-between mb-4">
<label class="block text-sm font-medium text-gray-300">
<i class="fas fa-cog mr-2"></i>Preferences (Optional)
</label>
</div>
<!-- Common Preferences -->
<div class="space-y-4 mb-6 p-4 bg-gray-900/50 rounded-xl border border-gray-700/50">
<h3 class="text-sm font-semibold text-gray-300 mb-3 flex items-center">
<i class="fas fa-star mr-2 text-yellow-400"></i>Common Preferences
</h3>
<!-- Backend Selection -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-server mr-2"></i>Backend
</label>
<select
x-model="commonPreferences.backend"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<option value="">Auto-detect (based on URI)</option>
<option value="llama-cpp">llama-cpp</option>
<option value="mlx">mlx</option>
<option value="mlx-vlm">mlx-vlm</option>
</select>
<p class="mt-1 text-xs text-gray-400">
Force a specific backend. Leave empty to auto-detect from URI.
</p>
</div>
<!-- Model Name -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-tag mr-2"></i>Model Name
</label>
<input
x-model="commonPreferences.name"
type="text"
placeholder="Leave empty to use filename"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-1 text-xs text-gray-400">
Custom name for the model. If empty, the filename will be used.
</p>
</div>
<!-- Description -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-align-left mr-2"></i>Description
</label>
<textarea
x-model="commonPreferences.description"
rows="3"
placeholder="Leave empty to use default description"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all resize-none"
:disabled="isSubmitting"></textarea>
<p class="mt-1 text-xs text-gray-400">
Custom description for the model. If empty, a default description will be generated.
</p>
</div>
<!-- Quantizations -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-layer-group mr-2"></i>Quantizations
</label>
<input
x-model="commonPreferences.quantizations"
type="text"
placeholder="q4_k_m,q4_k_s,q3_k_m (comma-separated)"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-1 text-xs text-gray-400">
Preferred quantizations (comma-separated). Examples: q4_k_m, q4_k_s, q3_k_m, q2_k. Leave empty to use default (q4_k_m).
</p>
</div>
<!-- MMProj Quantizations -->
<div>
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-image mr-2"></i>MMProj Quantizations
</label>
<input
x-model="commonPreferences.mmproj_quantizations"
type="text"
placeholder="fp16,fp32 (comma-separated)"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-1 text-xs text-gray-400">
Preferred MMProj quantizations (comma-separated). Examples: fp16, fp32. Leave empty to use default (fp16).
</p>
</div>
</div>
<!-- Custom Preferences -->
<div class="space-y-3">
<div class="flex items-center justify-between mb-3">
<label class="block text-sm font-medium text-gray-300">
<i class="fas fa-sliders-h mr-2"></i>Custom Preferences
</label>
<button @click="addPreference()"
:disabled="isSubmitting"
class="text-sm px-3 py-1.5 rounded-lg bg-green-600/20 hover:bg-green-600/30 text-green-300 border border-green-500/30 transition-all">
<i class="fas fa-plus mr-1"></i>Add Custom
</button>
</div>
<div class="space-y-3" x-show="preferences.length > 0">
<template x-for="(pref, index) in preferences" :key="index">
<div class="flex gap-3 items-center">
<input
x-model="pref.key"
type="text"
placeholder="Key"
class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<span class="text-gray-400">:</span>
<input
x-model="pref.value"
type="text"
placeholder="Value"
class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<button @click="removePreference(index)"
:disabled="isSubmitting"
class="px-3 py-2 rounded-lg bg-red-600/20 hover:bg-red-600/30 text-red-300 border border-red-500/30 transition-all">
<i class="fas fa-trash"></i>
</button>
</div>
</template>
</div>
<p class="mt-2 text-xs text-gray-400">
Add custom key-value pairs for advanced configuration
</p>
</div>
</div>
</div>
</div>
<!-- Advanced YAML Editor Panel -->
<div x-show="isAdvancedMode || isEditMode"
x-transition:enter="transition ease-out duration-300"
x-transition:enter-start="opacity-0 transform translate-y-4"
x-transition:enter-end="opacity-100 transform translate-y-0"
class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm h-[calc(100vh-250px)]">
<div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-fuchsia-500/5 to-purple-500/5"></div>
<div class="relative sticky top-0 bg-gray-800/95 border-b border-gray-700/50 p-6 flex items-center justify-between z-10 backdrop-blur-sm">
@@ -144,22 +349,22 @@
}
/* Enhanced YAML Syntax Highlighting */
.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for YAML keys */
.cm-string { color: #10b981 !important; } /* Emerald for strings */
.cm-number { color: #f59e0b !important; } /* Amber for numbers */
.cm-comment { color: #6b7280 !important; font-style: italic !important; } /* Gray for comments */
.cm-property { color: #ec4899 !important; } /* Pink for properties */
.cm-operator { color: #ef4444 !important; } /* Red for operators */
.cm-variable { color: #06b6d4 !important; } /* Cyan for variables */
.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for tags */
.cm-attribute { color: #f59e0b !important; } /* Amber for attributes */
.cm-def { color: #ec4899 !important; font-weight: 600 !important; } /* Pink for definitions */
.cm-bracket { color: #d1d5db !important; } /* Light gray for brackets */
.cm-punctuation { color: #d1d5db !important; } /* Light gray for punctuation */
.cm-quote { color: #10b981 !important; } /* Emerald for quotes */
.cm-meta { color: #6b7280 !important; } /* Gray for meta */
.cm-builtin { color: #f472b6 !important; } /* Pink for builtins */
.cm-atom { color: #f59e0b !important; } /* Amber for atoms like true/false/null */
.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; }
.cm-string { color: #10b981 !important; }
.cm-number { color: #f59e0b !important; }
.cm-comment { color: #6b7280 !important; font-style: italic !important; }
.cm-property { color: #ec4899 !important; }
.cm-operator { color: #ef4444 !important; }
.cm-variable { color: #06b6d4 !important; }
.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; }
.cm-attribute { color: #f59e0b !important; }
.cm-def { color: #ec4899 !important; font-weight: 600 !important; }
.cm-bracket { color: #d1d5db !important; }
.cm-punctuation { color: #d1d5db !important; }
.cm-quote { color: #10b981 !important; }
.cm-meta { color: #6b7280 !important; }
.cm-builtin { color: #f472b6 !important; }
.cm-atom { color: #f59e0b !important; }
/* Enhanced scrollbar styling */
.CodeMirror-scrollbar-filler, .CodeMirror-gutter-filler {
@@ -242,22 +447,221 @@
</style>
<script>
class ModelEditor {
constructor() {
this.modelName = '{{.ModelName}}';
this.isEditMode = !!this.modelName;
this.yamlEditor = null;
function importModel() {
return {
isAdvancedMode: false,
isEditMode: {{if .ModelName}}true{{else}}false{{end}},
importUri: '',
preferences: [],
commonPreferences: {
backend: '',
name: '',
description: '',
quantizations: '',
mmproj_quantizations: ''
},
isSubmitting: false,
currentJobId: null,
jobPollInterval: null,
yamlEditor: null,
modelEditor: null,
this.init();
}
init() {
this.initializeCodeMirror();
this.bindEvents();
}
getDefaultConfig() {
return `# Model Configuration
init() {
// If in edit mode, always show advanced mode
if (this.isEditMode) {
this.isAdvancedMode = true;
}
// Initialize YAML editor if in advanced mode
if (this.isAdvancedMode || this.isEditMode) {
this.$nextTick(() => {
this.initializeCodeMirror();
this.bindAdvancedEvents();
});
}
},
toggleMode() {
this.isAdvancedMode = !this.isAdvancedMode;
if (this.isAdvancedMode) {
this.$nextTick(() => {
this.initializeCodeMirror();
this.bindAdvancedEvents();
});
}
},
addPreference() {
this.preferences.push({ key: '', value: '' });
},
removePreference(index) {
this.preferences.splice(index, 1);
},
async submitImport() {
if (!this.importUri.trim()) {
this.showAlert('error', 'Please enter a model URI');
return;
}
this.isSubmitting = true;
try {
// Build preferences object starting with common preferences
const prefsObj = {};
// Add common preferences (only non-empty values)
if (this.commonPreferences.backend && this.commonPreferences.backend.trim()) {
prefsObj.backend = this.commonPreferences.backend.trim();
}
if (this.commonPreferences.name && this.commonPreferences.name.trim()) {
prefsObj.name = this.commonPreferences.name.trim();
}
if (this.commonPreferences.description && this.commonPreferences.description.trim()) {
prefsObj.description = this.commonPreferences.description.trim();
}
if (this.commonPreferences.quantizations && this.commonPreferences.quantizations.trim()) {
prefsObj.quantizations = this.commonPreferences.quantizations.trim();
}
if (this.commonPreferences.mmproj_quantizations && this.commonPreferences.mmproj_quantizations.trim()) {
prefsObj.mmproj_quantizations = this.commonPreferences.mmproj_quantizations.trim();
}
// Add custom preferences (can override common ones)
this.preferences.forEach(pref => {
if (pref.key && pref.value) {
prefsObj[pref.key.trim()] = pref.value.trim();
}
});
const requestBody = {
uri: this.importUri.trim(),
preferences: Object.keys(prefsObj).length > 0 ? prefsObj : null
};
const response = await fetch('/models/import-uri', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody)
});
if (!response.ok) {
const error = await response.json().catch(() => ({ error: 'Failed to start import' }));
throw new Error(error.error || 'Failed to start import');
}
const result = await response.json();
if (result.uuid) {
this.currentJobId = result.uuid;
this.showAlert('success', 'Import started! Tracking progress...');
this.startJobPolling();
} else if (result.ID) {
// Fallback for different response format
this.currentJobId = result.ID;
this.showAlert('success', 'Import started! Tracking progress...');
this.startJobPolling();
} else {
throw new Error('No job ID returned from server');
}
} catch (error) {
this.showAlert('error', 'Failed to start import: ' + error.message);
this.isSubmitting = false;
}
},
startJobPolling() {
if (this.jobPollInterval) {
clearInterval(this.jobPollInterval);
}
this.jobPollInterval = setInterval(async () => {
if (!this.currentJobId) {
clearInterval(this.jobPollInterval);
return;
}
try {
const response = await fetch(`/models/jobs/${this.currentJobId}`);
if (!response.ok) {
return;
}
const jobData = await response.json();
if (jobData.completed) {
clearInterval(this.jobPollInterval);
this.isSubmitting = false;
this.currentJobId = null;
this.showAlert('success', 'Model imported successfully! Refreshing page...');
// Refresh the page after a short delay
setTimeout(() => {
window.location.reload();
}, 2000);
} else if (jobData.error) {
clearInterval(this.jobPollInterval);
this.isSubmitting = false;
this.currentJobId = null;
this.showAlert('error', 'Import failed: ' + jobData.error);
}
} catch (error) {
console.error('Error polling job status:', error);
}
}, 1000);
},
initializeCodeMirror() {
if (this.yamlEditor) {
return; // Already initialized
}
const initialValue = {{if .ConfigYAML}}`{{.ConfigYAML}}`{{else}}this.getDefaultConfig(){{end}};
this.yamlEditor = CodeMirror(document.getElementById('yamlCodeMirror'), {
mode: 'yaml',
theme: 'default',
lineNumbers: true,
autoRefresh: true,
indentUnit: 2,
tabSize: 2,
indentWithTabs: false,
lineWrapping: true,
styleActiveLine: true,
matchBrackets: true,
autoCloseBrackets: true,
value: initialValue
});
},
bindAdvancedEvents() {
if (!this.yamlEditor) return;
// Button events
const saveBtn = document.getElementById('saveBtn');
const validateBtn = document.getElementById('validateBtn');
const formatYamlBtn = document.getElementById('formatYamlBtn');
const copyYamlBtn = document.getElementById('copyYamlBtn');
if (saveBtn) {
saveBtn.addEventListener('click', () => this.saveConfig());
}
if (validateBtn) {
validateBtn.addEventListener('click', () => this.validateConfig());
}
if (formatYamlBtn) {
formatYamlBtn.addEventListener('click', () => this.formatYaml());
}
if (copyYamlBtn) {
copyYamlBtn.addEventListener('click', () => this.copyYaml());
}
},
getDefaultConfig() {
return `# Model Configuration
name: my-model
backend: llama-cpp
parameters:
@@ -286,186 +690,150 @@ parameters:
# - chat
# - completion
`;
}
initializeCodeMirror() {
const initialValue = {{if .ConfigYAML}}`{{.ConfigYAML}}`{{else}}this.getDefaultConfig(){{end}};
},
this.yamlEditor = CodeMirror(document.getElementById('yamlCodeMirror'), {
mode: 'yaml',
theme: 'default',
lineNumbers: true,
autoRefresh: true,
indentUnit: 2,
tabSize: 2,
indentWithTabs: false,
lineWrapping: true,
styleActiveLine: true,
matchBrackets: true,
autoCloseBrackets: true,
value: initialValue
});
}
bindEvents() {
// Button events
document.getElementById('saveBtn').addEventListener('click', () => this.saveConfig());
document.getElementById('validateBtn').addEventListener('click', () => this.validateConfig());
document.getElementById('formatYamlBtn').addEventListener('click', () => this.formatYaml());
document.getElementById('copyYamlBtn').addEventListener('click', () => this.copyYaml());
}
validateConfig() {
try {
const yamlContent = this.yamlEditor.getValue();
const config = jsyaml.load(yamlContent);
if (!config || typeof config !== 'object') {
throw new Error('Invalid YAML structure');
}
if (!config.name) {
throw new Error('Model name is required');
}
if (!config.backend) {
throw new Error('Backend is required');
}
if (!config.parameters || !config.parameters.model) {
throw new Error('Model file/path is required in parameters.model');
}
this.showAlert('success', 'Configuration is valid!');
} catch (error) {
this.showAlert('error', 'Validation failed: ' + error.message);
}
}
async saveConfig() {
try {
// Validate before saving
const yamlContent = this.yamlEditor.getValue();
const config = jsyaml.load(yamlContent);
if (!config || typeof config !== 'object') {
throw new Error('Invalid YAML structure');
}
if (!config.name) {
throw new Error('Model name is required');
}
if (!config.backend) {
throw new Error('Backend is required');
}
if (!config.parameters || !config.parameters.model) {
throw new Error('Model file/path is required in parameters.model');
}
const endpoint = this.isEditMode ? `/models/edit/${this.modelName}` : '/models/import';
const response = await fetch(endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/x-yaml',
},
body: yamlContent
});
const result = await response.json();
if (result.success) {
this.showAlert('success', result.message || (this.isEditMode ? 'Model updated successfully!' : 'Model created successfully!'));
if (!this.isEditMode && config.name) {
setTimeout(() => {
window.location.href = `/models/edit/${config.name}`;
}, 2000);
validateConfig() {
try {
const yamlContent = this.yamlEditor.getValue();
const config = jsyaml.load(yamlContent);
if (!config || typeof config !== 'object') {
throw new Error('Invalid YAML structure');
}
} else {
this.showAlert('error', result.error || 'Failed to save configuration');
if (!config.name) {
throw new Error('Model name is required');
}
if (!config.backend) {
throw new Error('Backend is required');
}
if (!config.parameters || !config.parameters.model) {
throw new Error('Model file/path is required in parameters.model');
}
this.showAlert('success', 'Configuration is valid!');
} catch (error) {
this.showAlert('error', 'Validation failed: ' + error.message);
}
} catch (error) {
this.showAlert('error', 'Failed to save: ' + error.message);
}
}
},
async saveConfig() {
try {
// Validate before saving
const yamlContent = this.yamlEditor.getValue();
const config = jsyaml.load(yamlContent);
if (!config || typeof config !== 'object') {
throw new Error('Invalid YAML structure');
}
if (!config.name) {
throw new Error('Model name is required');
}
if (!config.backend) {
throw new Error('Backend is required');
}
if (!config.parameters || !config.parameters.model) {
throw new Error('Model file/path is required in parameters.model');
}
const endpoint = this.isEditMode ? `/models/edit/{{.ModelName}}` : '/models/import';
const response = await fetch(endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/x-yaml',
},
body: yamlContent
});
formatYaml() {
try {
const result = await response.json();
if (result.success) {
this.showAlert('success', result.message || (this.isEditMode ? 'Model updated successfully!' : 'Model created successfully!'));
if (!this.isEditMode && config.name) {
setTimeout(() => {
window.location.href = `/models/edit/${config.name}`;
}, 2000);
}
} else {
this.showAlert('error', result.error || 'Failed to save configuration');
}
} catch (error) {
this.showAlert('error', 'Failed to save: ' + error.message);
}
},
formatYaml() {
try {
const yamlContent = this.yamlEditor.getValue();
const parsed = jsyaml.load(yamlContent);
const formatted = jsyaml.dump(parsed, {
indent: 2,
lineWidth: 120,
noRefs: true,
sortKeys: false
});
this.yamlEditor.setValue(formatted);
this.showAlert('success', 'YAML formatted successfully');
} catch (error) {
this.showAlert('error', 'Failed to format YAML: ' + error.message);
}
},
copyYaml() {
const yamlContent = this.yamlEditor.getValue();
const parsed = jsyaml.load(yamlContent);
const formatted = jsyaml.dump(parsed, {
indent: 2,
lineWidth: 120,
noRefs: true,
sortKeys: false
navigator.clipboard.writeText(yamlContent).then(() => {
this.showAlert('success', 'YAML copied to clipboard');
}).catch(err => {
// Fallback for older browsers
const textArea = document.createElement('textarea');
textArea.value = yamlContent;
document.body.appendChild(textArea);
textArea.select();
document.execCommand('copy');
document.body.removeChild(textArea);
this.showAlert('success', 'YAML copied to clipboard');
});
this.yamlEditor.setValue(formatted);
this.showAlert('success', 'YAML formatted successfully');
} catch (error) {
this.showAlert('error', 'Failed to format YAML: ' + error.message);
}
}
copyYaml() {
const yamlContent = this.yamlEditor.getValue();
navigator.clipboard.writeText(yamlContent).then(() => {
this.showAlert('success', 'YAML copied to clipboard');
}).catch(err => {
// Fallback for older browsers
const textArea = document.createElement('textarea');
textArea.value = yamlContent;
document.body.appendChild(textArea);
textArea.select();
document.execCommand('copy');
document.body.removeChild(textArea);
this.showAlert('success', 'YAML copied to clipboard');
});
}
showAlert(type, message) {
const container = document.getElementById('alertContainer');
const alertClasses = {
success: 'alert alert-success',
error: 'alert alert-error',
warning: 'alert alert-warning',
info: 'alert alert-info'
};
},
const alertIcons = {
success: 'fas fa-check-circle',
error: 'fas fa-exclamation-triangle',
warning: 'fas fa-exclamation-circle',
info: 'fas fa-info-circle'
};
container.innerHTML = `
<div class="${alertClasses[type]}">
<div class="flex items-center">
<i class="${alertIcons[type]} mr-3 text-lg"></i>
<span class="flex-1">${message}</span>
<button onclick="this.parentElement.parentElement.remove()" class="ml-4 text-current hover:opacity-70 transition-opacity">
<i class="fas fa-times"></i>
</button>
showAlert(type, message) {
const container = document.getElementById('alertContainer');
const alertClasses = {
success: 'alert alert-success',
error: 'alert alert-error',
warning: 'alert alert-warning',
info: 'alert alert-info'
};
const alertIcons = {
success: 'fas fa-check-circle',
error: 'fas fa-exclamation-triangle',
warning: 'fas fa-exclamation-circle',
info: 'fas fa-info-circle'
};
container.innerHTML = `
<div class="${alertClasses[type]}">
<div class="flex items-center">
<i class="${alertIcons[type]} mr-3 text-lg"></i>
<span class="flex-1">${message}</span>
<button onclick="this.parentElement.parentElement.remove()" class="ml-4 text-current hover:opacity-70 transition-opacity">
<i class="fas fa-times"></i>
</button>
</div>
</div>
</div>
`;
if (type === 'success' || type === 'info') {
setTimeout(() => {
const alert = container.querySelector('div');
if (alert) alert.remove();
}, 5000);
`;
if (type === 'success' || type === 'info') {
setTimeout(() => {
const alert = container.querySelector('div');
if (alert) alert.remove();
}, 5000);
}
}
}
clearAlert() {
document.getElementById('alertContainer').innerHTML = '';
}
}
// Initialize the editor when the page loads
let modelEditor;
document.addEventListener('DOMContentLoaded', () => {
modelEditor = new ModelEditor();
});
</script>
</body>

View File

@@ -4,13 +4,13 @@
<body class="bg-[#101827] text-[#E5E7EB]">
<div class="flex flex-col min-h-screen" x-data="modelsGallery()">
{{template "views/partials/navbar" .}}
<!-- Notifications -->
<div class="fixed top-20 right-4 z-50 space-y-2" style="max-width: 400px;">
<template x-for="notification in notifications" :key="notification.id">
<div x-show="true"
<div x-show="true"
x-transition:enter="transform ease-out duration-300 transition"
x-transition:enter-start="translate-x-full opacity-0"
x-transition:enter-end="translate-x-0 opacity-100"
@@ -31,7 +31,7 @@
</div>
</template>
</div>
<div class="container mx-auto px-4 py-8 flex-grow">
<!-- Hero Header -->
@@ -41,7 +41,7 @@
<div class="absolute inset-0 bg-gradient-to-r from-[#38BDF8]/20 to-[#8B5CF6]/20"></div>
<div class="absolute top-0 left-0 w-full h-full" style="background-image: radial-gradient(circle at 1px 1px, rgba(56,189,248,0.15) 1px, transparent 0); background-size: 20px 20px;"></div>
</div>
<div class="relative max-w-5xl mx-auto text-center">
<h1 class="text-4xl md:text-5xl font-bold text-[#E5E7EB] mb-4">
<span class="bg-clip-text text-transparent bg-gradient-to-r from-[#38BDF8] via-[#8B5CF6] to-[#38BDF8]">
@@ -62,7 +62,7 @@
<span class="font-semibold text-purple-300" x-text="repositories.length"></span>
<span class="text-gray-300 ml-1">repositories</span>
</div>
<a href="https://localai.io/models/" target="_blank"
<a href="https://localai.io/models/" target="_blank"
class="flex items-center bg-blue-600/80 hover:bg-blue-600 text-white px-4 py-2 rounded-full transition-all duration-300 hover:scale-105">
<i class="fas fa-info-circle mr-2"></i>
<span>Documentation</span>
@@ -71,13 +71,13 @@
</div>
</div>
</div>
{{template "views/partials/inprogress" .}}
<!-- Search and Filter Section -->
<div class="relative bg-gradient-to-br from-gray-800/80 to-gray-900/80 rounded-2xl p-8 mb-8 shadow-xl border border-gray-700/50 backdrop-blur-sm">
<div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-blue-500/5 to-purple-500/5"></div>
<div class="relative">
<!-- Search Input -->
<div class="mb-8">
@@ -89,11 +89,11 @@
<div class="absolute inset-y-0 start-0 flex items-center ps-4 pointer-events-none">
<i class="fas fa-search text-gray-400"></i>
</div>
<input
<input
x-model="searchTerm"
@input.debounce.500ms="fetchModels()"
class="w-full pl-12 pr-16 py-4 text-base font-normal text-gray-300 bg-gray-900/90 border border-gray-700/70 rounded-xl transition-all duration-300 focus:text-gray-200 focus:bg-gray-900 focus:border-blue-500 focus:ring-2 focus:ring-blue-500/50 focus:outline-none"
type="search"
class="w-full pl-12 pr-16 py-4 text-base font-normal text-gray-300 bg-gray-900/90 border border-gray-700/70 rounded-xl transition-all duration-300 focus:text-gray-200 focus:bg-gray-900 focus:border-blue-500 focus:ring-2 focus:ring-blue-500/50 focus:outline-none"
type="search"
placeholder="Search models by name, tag, or description...">
<span class="absolute right-4 top-4" x-show="loading">
<svg class="animate-spin h-6 w-6 text-blue-500" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
@@ -103,7 +103,7 @@
</span>
</div>
</div>
<!-- Filter by Type -->
<div class="mb-8">
<h3 class="text-lg font-semibold text-white mb-4 flex items-center">
@@ -153,7 +153,7 @@
</button>
</div>
</div>
<!-- Filter by Tags -->
<div x-show="allTags.length > 0">
<h3 class="text-lg font-semibold text-white mb-4 flex items-center">
@@ -210,12 +210,12 @@
<tr class="hover:bg-[#38BDF8]/10 transition-colors duration-200">
<!-- Icon -->
<td class="px-6 py-4">
<img :src="model.icon || 'https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg'"
<img :src="model.icon || 'https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg'"
class="w-12 h-12 object-cover rounded-lg border border-[#38BDF8]/30"
loading="lazy"
:alt="model.name">
</td>
<!-- Model Name -->
<td class="px-6 py-4">
<div class="flex flex-col">
@@ -228,12 +228,12 @@
</div>
</div>
</td>
<!-- Description -->
<td class="px-6 py-4">
<div class="text-sm text-[#94A3B8] max-w-xs truncate" x-text="model.description" :title="model.description"></div>
</td>
<!-- Repository -->
<td class="px-6 py-4">
<span class="inline-flex items-center text-xs px-2 py-1 rounded bg-[#38BDF8]/10 text-[#E5E7EB] border border-[#38BDF8]/30">
@@ -241,7 +241,7 @@
<span x-text="model.gallery"></span>
</span>
</td>
<!-- License -->
<td class="px-6 py-4">
<span x-show="model.license" class="inline-flex items-center text-xs px-2 py-1 rounded bg-[#8B5CF6]/10 text-[#E5E7EB] border border-[#8B5CF6]/30">
@@ -250,7 +250,7 @@
</span>
<span x-show="!model.license" class="text-xs text-[#94A3B8]">-</span>
</td>
<!-- Status -->
<td class="px-6 py-4">
<!-- Processing State -->
@@ -265,7 +265,7 @@
<div class="progress-bar-table" :style="'width:' + (jobProgress[model.jobID] || 0) + '%'"></div>
</div>
</div>
<!-- Installed State -->
<div x-show="!model.processing && model.installed">
<span class="inline-flex items-center text-xs px-2 py-1 rounded bg-green-500/20 text-green-300 border border-green-500/30">
@@ -273,7 +273,7 @@
Installed
</span>
</div>
<!-- Not Installed State -->
<div x-show="!model.processing && !model.installed">
<span class="inline-flex items-center text-xs px-2 py-1 rounded bg-[#1E293B] text-[#94A3B8] border border-[#38BDF8]/30">
@@ -282,42 +282,42 @@
</span>
</div>
</td>
<!-- Actions -->
<td class="px-6 py-4">
<div class="flex items-center justify-end gap-2">
<!-- Info Button -->
<button @click="openModal(model)"
<button @click="openModal(model)"
class="inline-flex items-center px-3 py-1.5 rounded-lg bg-[#1E293B] hover:bg-[#38BDF8]/20 text-xs font-medium text-[#E5E7EB] transition duration-200 border border-[#38BDF8]/30"
title="View details">
<i class="fas fa-info-circle"></i>
</button>
<!-- Installed State Actions -->
<template x-if="!model.processing && model.installed">
<div class="flex gap-2">
<button @click="reinstallModel(model.id)"
<button @click="reinstallModel(model.id)"
class="inline-flex items-center px-3 py-1.5 rounded-lg bg-[#38BDF8] hover:bg-[#38BDF8]/80 text-xs font-medium text-white transition duration-200"
title="Reinstall">
<i class="fa-solid fa-arrow-rotate-right"></i>
</button>
<button @click="deleteModel(model.id)"
<button @click="deleteModel(model.id)"
class="inline-flex items-center px-3 py-1.5 rounded-lg bg-red-600 hover:bg-red-700 text-xs font-medium text-white transition duration-200"
title="Delete">
<i class="fa-solid fa-trash"></i>
</button>
</div>
</template>
<!-- Not Installed State Actions -->
<template x-if="!model.processing && !model.installed">
<div class="flex gap-2">
<button @click="getConfig(model.id)"
<button @click="getConfig(model.id)"
class="inline-flex items-center px-3 py-1.5 rounded-lg bg-[#8B5CF6]/20 hover:bg-[#8B5CF6]/40 text-xs font-medium text-[#E5E7EB] transition duration-200 border border-[#8B5CF6]/30"
title="Get config">
<i class="fa-solid fa-file-code"></i>
</button>
<button @click="installModel(model.id)"
<button @click="installModel(model.id)"
class="inline-flex items-center px-3 py-1.5 rounded-lg bg-[#38BDF8] hover:bg-[#38BDF8]/80 text-xs font-medium text-white transition duration-200"
title="Install">
<i class="fa-solid fa-download"></i>
@@ -334,7 +334,7 @@
</div>
<!-- Modal -->
<div x-show="selectedModel"
<div x-show="selectedModel"
x-transition
@click.away="closeModal()"
class="fixed top-0 right-0 left-0 z-50 flex justify-center items-center w-full md:inset-0 h-full max-h-full bg-gray-900/50"
@@ -344,7 +344,7 @@
<!-- Modal Header -->
<div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
<h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selectedModel?.name"></h3>
<button @click="closeModal()"
<button @click="closeModal()"
class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white">
<svg class="w-3 h-3" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
@@ -355,11 +355,11 @@
<!-- Modal Body -->
<div class="p-4 md:p-5 space-y-4 overflow-y-auto flex-1 min-h-0">
<div class="flex justify-center items-center">
<img :src="selectedModel?.icon || 'https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg'"
class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3"
<img :src="selectedModel?.icon || 'https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg'"
class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3"
loading="lazy">
</div>
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400" x-text="selectedModel?.description"></p>
<div class="text-base leading-relaxed text-gray-500 dark:text-gray-400 break-words max-w-full markdown-content" x-html="renderMarkdown(selectedModel?.description)"></div>
<hr>
<template x-if="selectedModel?.urls && selectedModel.urls.length > 0">
<div>
@@ -376,6 +376,21 @@
</ul>
</div>
</template>
<template x-if="selectedModel?.additionalFiles && selectedModel.additionalFiles.length > 0">
<div>
<p class="text-sm font-semibold text-gray-900 dark:text-white mb-2">Files</p>
<ul>
<template x-for="file in selectedModel.additionalFiles" :key="file">
<li class="mb-0">
<p class="text-base leading-tight text-gray-500 dark:text-gray-400">
<i class="fas fa-file pr-2"></i>
<span x-text="file.filename"></span>
</p>
</li>
</template>
</ul>
</div>
</template>
<template x-if="selectedModel?.tags && selectedModel.tags.length > 0">
<div>
<p class="text-sm mb-3 font-semibold text-gray-900 dark:text-white">Tags</p>
@@ -393,7 +408,7 @@
</div>
<!-- Modal Footer -->
<div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
<button @click="closeModal()"
<button @click="closeModal()"
class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">
Close
</button>
@@ -406,7 +421,7 @@
<!-- Pagination -->
<div x-show="totalPages > 1" class="flex justify-center mt-12">
<div class="flex items-center gap-4 bg-gray-800/60 rounded-2xl p-4 backdrop-blur-sm border border-gray-700/50">
<button @click="goToPage(currentPage - 1)"
<button @click="goToPage(currentPage - 1)"
:disabled="currentPage <= 1"
:class="currentPage <= 1 ? 'opacity-50 cursor-not-allowed' : ''"
class="group flex items-center justify-center h-12 w-12 bg-gray-700/80 hover:bg-indigo-600 text-gray-300 hover:text-white rounded-xl shadow-lg transition-all duration-300 ease-in-out transform hover:scale-110">
@@ -418,7 +433,7 @@
<span class="text-gray-400">of</span>
<span class="text-white font-bold text-lg mx-2" x-text="totalPages"></span>
</div>
<button @click="goToPage(currentPage + 1)"
<button @click="goToPage(currentPage + 1)"
:disabled="currentPage >= totalPages"
:class="currentPage >= totalPages ? 'opacity-50 cursor-not-allowed' : ''"
class="group flex items-center justify-center h-12 w-12 bg-gray-700/80 hover:bg-indigo-600 text-gray-300 hover:text-white rounded-xl shadow-lg transition-all duration-300 ease-in-out transform hover:scale-110">
@@ -495,6 +510,42 @@ tbody tr:last-child td:first-child {
tbody tr:last-child td:last-child {
border-bottom-right-radius: 1rem;
}
/* Markdown content overflow handling */
.markdown-content {
word-wrap: break-word;
overflow-wrap: anywhere;
max-width: 100%;
}
.markdown-content pre {
overflow-x: auto;
max-width: 100%;
white-space: pre-wrap;
word-wrap: break-word;
}
.markdown-content code {
word-wrap: break-word;
overflow-wrap: break-word;
}
.markdown-content pre code {
white-space: pre;
overflow-x: auto;
display: block;
}
.markdown-content table {
max-width: 100%;
overflow-x: auto;
display: block;
}
.markdown-content img {
max-width: 100%;
height: auto;
}
</style>
<script>
@@ -511,24 +562,24 @@ function modelsGallery() {
selectedModel: null,
jobProgress: {},
notifications: [],
init() {
this.fetchModels();
// Poll for job progress every 600ms
setInterval(() => this.pollJobs(), 600);
},
addNotification(message, type = 'error') {
const id = Date.now();
this.notifications.push({ id, message, type });
// Auto-dismiss after 10 seconds
setTimeout(() => this.dismissNotification(id), 10000);
},
dismissNotification(id) {
this.notifications = this.notifications.filter(n => n.id !== id);
},
async fetchModels() {
this.loading = true;
try {
@@ -539,7 +590,7 @@ function modelsGallery() {
});
const response = await fetch(`/api/models?${params}`);
const data = await response.json();
this.models = data.models || [];
this.allTags = data.allTags || [];
this.repositories = data.repositories || [];
@@ -552,20 +603,20 @@ function modelsGallery() {
this.loading = false;
}
},
filterByTerm(term) {
this.searchTerm = term;
this.currentPage = 1;
this.fetchModels();
},
goToPage(page) {
if (page >= 1 && page <= this.totalPages) {
this.currentPage = page;
this.fetchModels();
}
},
async installModel(modelId) {
try {
const response = await fetch(`/api/models/install/${encodeURIComponent(modelId)}`, {
@@ -586,12 +637,12 @@ function modelsGallery() {
alert('Failed to start installation');
}
},
async deleteModel(modelId) {
if (!confirm('Are you sure you wish to delete the model?')) {
return;
}
try {
const response = await fetch(`/api/models/delete/${encodeURIComponent(modelId)}`, {
method: 'POST'
@@ -610,11 +661,11 @@ function modelsGallery() {
alert('Failed to start deletion');
}
},
async reinstallModel(modelId) {
this.installModel(modelId);
},
async getConfig(modelId) {
try {
const response = await fetch(`/api/models/config/${encodeURIComponent(modelId)}`, {
@@ -627,24 +678,24 @@ function modelsGallery() {
alert('Failed to get configuration');
}
},
async pollJobs() {
const processingModels = this.models.filter(m => m.processing && m.jobID);
for (const model of processingModels) {
try {
const response = await fetch(`/api/models/job/${model.jobID}`);
const jobData = await response.json();
// Handle queued status
if (jobData.queued) {
this.jobProgress[model.jobID] = 0;
// Keep processing state but don't show error
continue;
}
this.jobProgress[model.jobID] = jobData.progress || 0;
if (jobData.completed) {
model.processing = false;
model.installed = !jobData.deletion;
@@ -655,7 +706,7 @@ function modelsGallery() {
// Refresh the models list to get updated state
this.fetchModels();
}
if (jobData.error) {
model.processing = false;
delete this.jobProgress[model.jobID];
@@ -668,11 +719,25 @@ function modelsGallery() {
}
}
},
renderMarkdown(text) {
if (!text) return '';
try {
if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
return text; // Return plain text if libraries not loaded
}
const html = marked.parse(text);
return DOMPurify.sanitize(html);
} catch (error) {
console.error('Error rendering markdown:', error);
return text;
}
},
openModal(model) {
this.selectedModel = model;
},
closeModal() {
this.selectedModel = null;
}

View File

@@ -10,6 +10,110 @@
{{template "views/partials/inprogress" .}}
<div class="container mx-auto px-4 py-8 flex-grow">
{{ if eq .P2PToken "" }}
<!-- P2P Disabled - Wizard Guide -->
<div class="relative bg-[#1E293B]/80 border border-[#8B5CF6]/20 rounded-2xl p-12 shadow-xl backdrop-blur-sm">
<div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-purple-500/5 to-cyan-500/5"></div>
<div class="relative text-center max-w-4xl mx-auto">
<div class="inline-flex items-center justify-center w-20 h-20 rounded-full bg-[#8B5CF6]/10 border border-[#8B5CF6]/20 mb-6">
<i class="text-[#8B5CF6] text-3xl fas fa-circle-nodes"></i>
</div>
<h2 class="text-3xl md:text-4xl font-bold text-[#E5E7EB] mb-6">
<span class="bg-clip-text text-transparent bg-gradient-to-r from-[#38BDF8] to-[#8B5CF6]">
P2P Distribution Not Enabled
</span>
</h2>
<p class="text-xl text-[#94A3B8] mb-8 leading-relaxed">
Enable peer-to-peer distribution to scale your AI workloads across multiple devices. Share instances, shard models, and pool computational resources across your network.
</p>
<!-- Features Preview -->
<div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-10">
<div class="bg-[#101827] border border-[#38BDF8]/20 rounded-xl p-4">
<div class="w-10 h-10 bg-blue-500/20 rounded-lg flex items-center justify-center mx-auto mb-3">
<i class="fas fa-network-wired text-[#38BDF8] text-xl"></i>
</div>
<h3 class="text-sm font-semibold text-[#E5E7EB] mb-2">Instance Federation</h3>
<p class="text-xs text-[#94A3B8]">Load balance across multiple instances</p>
</div>
<div class="bg-[#101827] border border-[#8B5CF6]/20 rounded-xl p-4">
<div class="w-10 h-10 bg-purple-500/20 rounded-lg flex items-center justify-center mx-auto mb-3">
<i class="fas fa-puzzle-piece text-[#8B5CF6] text-xl"></i>
</div>
<h3 class="text-sm font-semibold text-[#E5E7EB] mb-2">Model Sharding</h3>
<p class="text-xs text-[#94A3B8]">Split large models across workers</p>
</div>
<div class="bg-[#101827] border border-green-500/20 rounded-xl p-4">
<div class="w-10 h-10 bg-green-500/20 rounded-lg flex items-center justify-center mx-auto mb-3">
<i class="fas fa-share-alt text-green-400 text-xl"></i>
</div>
<h3 class="text-sm font-semibold text-[#E5E7EB] mb-2">Resource Sharing</h3>
<p class="text-xs text-[#94A3B8]">Pool resources from multiple devices</p>
</div>
</div>
<!-- Setup Instructions -->
<div class="bg-[#101827] border border-[#8B5CF6]/20 rounded-xl p-6 mb-8 text-left">
<h3 class="text-lg font-bold text-[#E5E7EB] mb-4 flex items-center">
<i class="fas fa-rocket text-[#8B5CF6] mr-2"></i>
How to Enable P2P
</h3>
<div class="space-y-4">
<div class="flex items-start">
<div class="flex-shrink-0 w-8 h-8 rounded-full bg-[#8B5CF6]/20 flex items-center justify-center mr-3 mt-0.5">
<span class="text-[#8B5CF6] font-bold text-sm">1</span>
</div>
<div class="flex-1">
<p class="text-[#E5E7EB] font-medium mb-2">Start LocalAI with P2P enabled</p>
<code class="block bg-[#1E293B] text-[#38BDF8] p-3 rounded-lg text-sm border border-[#38BDF8]/20">
local-ai run --p2p
</code>
<p class="text-[#94A3B8] text-sm mt-2">This will automatically generate a network token for you.</p>
</div>
</div>
<div class="flex items-start">
<div class="flex-shrink-0 w-8 h-8 rounded-full bg-[#8B5CF6]/20 flex items-center justify-center mr-3 mt-0.5">
<span class="text-[#8B5CF6] font-bold text-sm">2</span>
</div>
<div class="flex-1">
<p class="text-[#E5E7EB] font-medium mb-2">Or use an existing token</p>
<code class="block bg-[#1E293B] text-[#38BDF8] p-3 rounded-lg text-sm border border-[#38BDF8]/20">
export TOKEN="your-token-here"<br>
local-ai run --p2p
</code>
<p class="text-[#94A3B8] text-sm mt-2">If you already have a token from another instance, you can reuse it.</p>
</div>
</div>
<div class="flex items-start">
<div class="flex-shrink-0 w-8 h-8 rounded-full bg-[#8B5CF6]/20 flex items-center justify-center mr-3 mt-0.5">
<span class="text-[#8B5CF6] font-bold text-sm">3</span>
</div>
<div class="flex-1">
<p class="text-[#E5E7EB] font-medium mb-2">Access the P2P dashboard</p>
<p class="text-[#94A3B8] text-sm">Once enabled, refresh this page to see your network token and start connecting nodes.</p>
</div>
</div>
</div>
</div>
<div class="flex flex-wrap justify-center gap-4">
<a href="https://localai.io/features/distribute/" target="_blank"
class="inline-flex items-center bg-[#8B5CF6] hover:bg-[#8B5CF6]/90 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105 hover:shadow-[0_0_20px_rgba(139,92,246,0.4)]">
<i class="fas fa-book mr-2"></i>
Documentation
<i class="fas fa-external-link-alt ml-2 text-sm opacity-70"></i>
</a>
<a href="https://localai.io/basics/getting_started/" target="_blank"
class="inline-flex items-center bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#8B5CF6]/20 text-[#E5E7EB] py-3 px-6 rounded-xl font-semibold transition-all duration-300 transform hover:scale-105">
<i class="fas fa-graduation-cap mr-2"></i>
Getting Started
<i class="fas fa-external-link-alt ml-2 text-sm opacity-70"></i>
</a>
</div>
</div>
</div>
{{ else }}
<!-- P2P Enabled - Full Dashboard -->
<div class="workers mt-8">
<!-- Hero Section with Network Animation -->
<div class="animation-container mb-8">
@@ -129,22 +233,6 @@
The network token can be used to either share the instance or join a federation or a worker network. Below you will find examples on how to start a new instance or a worker with this token.
</p>
</div>
<!-- Warning box if p2p token is empty and p2p is enabled -->
{{ if eq .P2PToken "" }}
<div class="bg-gradient-to-r from-red-800/70 to-red-700/70 border border-red-600/50 p-6 rounded-xl shadow-lg mb-10 text-left">
<div class="flex items-center mb-2">
<i class="fa-solid fa-exclamation-triangle text-red-300 text-2xl mr-3"></i>
<h3 class="text-xl font-bold text-white">Warning: P2P token was not specified</h3>
</div>
<p class="mb-4 text-red-200">
You have to enable P2P mode by starting LocalAI with <code class="bg-red-900/50 px-2 py-0.5 rounded">--p2p</code>. Please restart the server with <code class="bg-red-900/50 px-2 py-0.5 rounded">--p2p</code> to generate a new token automatically that can be used to discover other nodes. If you already have a token, specify it with <code class="bg-red-900/50 px-2 py-0.5 rounded">export TOKEN=".."</code>
<a href="https://localai.io/features/distribute/" target="_blank" class="text-red-300 hover:text-red-200 underline underline-offset-2">
Check out the documentation for more information.
</a>
</p>
</div>
{{ else }}
<!-- Network Status Overview -->
<div class="grid grid-cols-1 lg:grid-cols-3 gap-6 mb-10">
@@ -507,13 +595,15 @@ docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --
</div>
</div>
<!-- Llama.cpp Box END -->
{{ end }}
</div>
{{ end }}
</div>
{{template "views/partials/footer" .}}
</div>
{{ if ne .P2PToken "" }}
<script src="static/p2panimation.js"></script>
{{ end }}
<style>
.token {
@@ -575,6 +665,7 @@ docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --
}
</style>
{{ if ne .P2PToken "" }}
<script>
function p2pNetwork() {
return {
@@ -623,6 +714,15 @@ function p2pNetwork() {
}
}
</script>
{{ else }}
<script>
function p2pNetwork() {
return {
// Empty component when P2P is disabled
}
}
</script>
{{ end }}
</body>
</html>

View File

@@ -120,8 +120,17 @@ function operationsStatus() {
throw new Error('Failed to fetch operations');
}
const data = await response.json();
const previousCount = this.operations.length;
this.operations = data.operations || [];
// If we had operations before and now we don't, refresh the page
if (previousCount > 0 && this.operations.length === 0) {
// Small delay to ensure the user sees the completion
setTimeout(() => {
window.location.reload();
}, 1000);
}
// Auto-collapse if there are many operations
if (this.operations.length > 5 && !this.collapsed) {
// Don't auto-collapse, let user control it

View File

@@ -73,7 +73,7 @@ func syncState(ctx context.Context, n *node.Node, app *application.Application)
continue
}
app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uuid.String(),
GalleryElementName: model,
Galleries: app.ApplicationConfig().Galleries,

View File

@@ -1,6 +1,7 @@
package schema
import (
"encoding/json"
"time"
gopsutil "github.com/shirou/gopsutil/v3/process"
@@ -157,3 +158,8 @@ type Detection struct {
Height float32 `json:"height"`
ClassName string `json:"class_name"`
}
type ImportModelRequest struct {
URI string `json:"uri"`
Preferences json.RawMessage `json:"preferences,omitempty"`
}

85
core/schema/message.go Normal file
View File

@@ -0,0 +1,85 @@
package schema
import (
"encoding/json"
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/pkg/grpc/proto"
)
type Message struct {
// The message role
Role string `json:"role,omitempty" yaml:"role"`
// The message name (used for tools calls)
Name string `json:"name,omitempty" yaml:"name"`
// The message content
Content interface{} `json:"content" yaml:"content"`
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
StringVideos []string `json:"string_videos,omitempty" yaml:"string_videos,omitempty"`
StringAudios []string `json:"string_audios,omitempty" yaml:"string_audios,omitempty"`
// A result of a function call
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
ToolCalls []ToolCall `json:"tool_calls,omitempty" yaml:"tool_call,omitempty"`
}
type ToolCall struct {
Index int `json:"index"`
ID string `json:"id"`
Type string `json:"type"`
FunctionCall FunctionCall `json:"function"`
}
type FunctionCall struct {
Name string `json:"name,omitempty"`
Arguments string `json:"arguments"`
}
type Messages []Message
// MessagesToProto converts schema.Message slice to proto.Message slice
// It handles content conversion, tool_calls serialization, and optional fields
func (messages Messages) ToProto() []*proto.Message {
protoMessages := make([]*proto.Message, len(messages))
for i, message := range messages {
protoMessages[i] = &proto.Message{
Role: message.Role,
Name: message.Name, // needed by function calls
}
switch ct := message.Content.(type) {
case string:
protoMessages[i].Content = ct
case []interface{}:
// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
data, _ := json.Marshal(ct)
resultData := []struct {
Text string `json:"text"`
}{}
json.Unmarshal(data, &resultData)
for _, r := range resultData {
protoMessages[i].Content += r.Text
}
}
// Serialize tool_calls to JSON string if present
if len(message.ToolCalls) > 0 {
toolCallsJSON, err := json.Marshal(message.ToolCalls)
if err != nil {
log.Warn().Err(err).Msg("failed to marshal tool_calls to JSON")
} else {
protoMessages[i].ToolCalls = string(toolCallsJSON)
}
}
// Note: tool_call_id and reasoning_content are not in schema.Message yet
// They may need to be added to schema.Message if needed in the future
}
return protoMessages
}

265
core/schema/message_test.go Normal file
View File

@@ -0,0 +1,265 @@
package schema_test
import (
"encoding/json"
. "github.com/mudler/LocalAI/core/schema"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("LLM tests", func() {
Context("ToProtoMessages conversion", func() {
It("should convert basic message with string content", func() {
messages := Messages{
{
Role: "user",
Content: "Hello, world!",
},
}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(1))
Expect(protoMessages[0].Role).To(Equal("user"))
Expect(protoMessages[0].Content).To(Equal("Hello, world!"))
Expect(protoMessages[0].Name).To(BeEmpty())
Expect(protoMessages[0].ToolCalls).To(BeEmpty())
})
It("should convert message with nil content to empty string", func() {
messages := Messages{
{
Role: "assistant",
Content: nil,
},
}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(1))
Expect(protoMessages[0].Role).To(Equal("assistant"))
Expect(protoMessages[0].Content).To(Equal(""))
})
It("should convert message with array content (multimodal)", func() {
messages := Messages{
{
Role: "user",
Content: []interface{}{
map[string]interface{}{
"type": "text",
"text": "Hello",
},
map[string]interface{}{
"type": "text",
"text": " World",
},
},
},
}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(1))
Expect(protoMessages[0].Role).To(Equal("user"))
Expect(protoMessages[0].Content).To(Equal("Hello World"))
})
It("should convert message with tool_calls", func() {
messages := Messages{
{
Role: "assistant",
Content: "I'll call a function",
ToolCalls: []ToolCall{
{
Index: 0,
ID: "call_123",
Type: "function",
FunctionCall: FunctionCall{
Name: "get_weather",
Arguments: `{"location": "San Francisco"}`,
},
},
},
},
}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(1))
Expect(protoMessages[0].Role).To(Equal("assistant"))
Expect(protoMessages[0].Content).To(Equal("I'll call a function"))
Expect(protoMessages[0].ToolCalls).NotTo(BeEmpty())
// Verify tool_calls JSON is valid
var toolCalls []ToolCall
err := json.Unmarshal([]byte(protoMessages[0].ToolCalls), &toolCalls)
Expect(err).NotTo(HaveOccurred())
Expect(toolCalls).To(HaveLen(1))
Expect(toolCalls[0].ID).To(Equal("call_123"))
Expect(toolCalls[0].FunctionCall.Name).To(Equal("get_weather"))
})
It("should convert message with name field", func() {
messages := Messages{
{
Role: "tool",
Content: "Function result",
Name: "get_weather",
},
}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(1))
Expect(protoMessages[0].Role).To(Equal("tool"))
Expect(protoMessages[0].Content).To(Equal("Function result"))
Expect(protoMessages[0].Name).To(Equal("get_weather"))
})
It("should convert message with tool_calls and nil content", func() {
messages := Messages{
{
Role: "assistant",
Content: nil,
ToolCalls: []ToolCall{
{
Index: 0,
ID: "call_456",
Type: "function",
FunctionCall: FunctionCall{
Name: "search",
Arguments: `{"query": "test"}`,
},
},
},
},
}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(1))
Expect(protoMessages[0].Role).To(Equal("assistant"))
Expect(protoMessages[0].Content).To(Equal(""))
Expect(protoMessages[0].ToolCalls).NotTo(BeEmpty())
var toolCalls []ToolCall
err := json.Unmarshal([]byte(protoMessages[0].ToolCalls), &toolCalls)
Expect(err).NotTo(HaveOccurred())
Expect(toolCalls).To(HaveLen(1))
Expect(toolCalls[0].FunctionCall.Name).To(Equal("search"))
})
It("should convert multiple messages", func() {
messages := Messages{
{
Role: "user",
Content: "Hello",
},
{
Role: "assistant",
Content: "Hi there!",
},
{
Role: "user",
Content: "How are you?",
},
}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(3))
Expect(protoMessages[0].Role).To(Equal("user"))
Expect(protoMessages[0].Content).To(Equal("Hello"))
Expect(protoMessages[1].Role).To(Equal("assistant"))
Expect(protoMessages[1].Content).To(Equal("Hi there!"))
Expect(protoMessages[2].Role).To(Equal("user"))
Expect(protoMessages[2].Content).To(Equal("How are you?"))
})
It("should handle empty messages slice", func() {
messages := Messages{}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(0))
})
It("should handle message with all optional fields", func() {
messages := Messages{
{
Role: "assistant",
Content: "I'll help you",
Name: "test_tool",
ToolCalls: []ToolCall{
{
Index: 0,
ID: "call_789",
Type: "function",
FunctionCall: FunctionCall{
Name: "test_function",
Arguments: `{"param": "value"}`,
},
},
},
},
}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(1))
Expect(protoMessages[0].Role).To(Equal("assistant"))
Expect(protoMessages[0].Content).To(Equal("I'll help you"))
Expect(protoMessages[0].Name).To(Equal("test_tool"))
Expect(protoMessages[0].ToolCalls).NotTo(BeEmpty())
var toolCalls []ToolCall
err := json.Unmarshal([]byte(protoMessages[0].ToolCalls), &toolCalls)
Expect(err).NotTo(HaveOccurred())
Expect(toolCalls).To(HaveLen(1))
})
It("should handle message with empty string content", func() {
messages := Messages{
{
Role: "user",
Content: "",
},
}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(1))
Expect(protoMessages[0].Role).To(Equal("user"))
Expect(protoMessages[0].Content).To(Equal(""))
})
It("should handle message with array content containing non-text parts", func() {
messages := Messages{
{
Role: "user",
Content: []interface{}{
map[string]interface{}{
"type": "text",
"text": "Hello",
},
map[string]interface{}{
"type": "image",
"url": "https://example.com/image.jpg",
},
},
},
}
protoMessages := messages.ToProto()
Expect(protoMessages).To(HaveLen(1))
Expect(protoMessages[0].Role).To(Equal("user"))
// Should only extract text parts
Expect(protoMessages[0].Content).To(Equal("Hello"))
})
})
})

View File

@@ -50,7 +50,7 @@ type OpenAIResponse struct {
type Choice struct {
Index int `json:"index"`
FinishReason string `json:"finish_reason"`
FinishReason *string `json:"finish_reason"`
Message *Message `json:"message,omitempty"`
Delta *Message `json:"delta,omitempty"`
Text string `json:"text,omitempty"`
@@ -76,39 +76,6 @@ type InputAudio struct {
Data string `json:"data" yaml:"data"`
}
type Message struct {
// The message role
Role string `json:"role,omitempty" yaml:"role"`
// The message name (used for tools calls)
Name string `json:"name,omitempty" yaml:"name"`
// The message content
Content interface{} `json:"content" yaml:"content"`
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
StringVideos []string `json:"string_videos,omitempty" yaml:"string_videos,omitempty"`
StringAudios []string `json:"string_audios,omitempty" yaml:"string_audios,omitempty"`
// A result of a function call
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
ToolCalls []ToolCall `json:"tool_calls,omitempty" yaml:"tool_call,omitempty"`
}
type ToolCall struct {
Index int `json:"index"`
ID string `json:"id"`
Type string `json:"type"`
FunctionCall FunctionCall `json:"function"`
}
type FunctionCall struct {
Name string `json:"name,omitempty"`
Arguments string `json:"arguments"`
}
type OpenAIModel struct {
ID string `json:"id"`
Object string `json:"object"`

View File

@@ -1,50 +1,51 @@
package schema
// @Description PredictionOptions contains prediction parameters for model inference
type PredictionOptions struct {
// Also part of the OpenAI official spec
BasicModelRequest `yaml:",inline"`
// Also part of the OpenAI official spec
Language string `json:"language"`
Language string `json:"language,omitempty" yaml:"language,omitempty"`
// Only for audio transcription
Translate bool `json:"translate"`
Translate bool `json:"translate,omitempty" yaml:"translate,omitempty"`
// Also part of the OpenAI official spec. use it for returning multiple results
N int `json:"n"`
N int `json:"n,omitempty" yaml:"n,omitempty"`
// Common options between all the API calls, part of the OpenAI spec
TopP *float64 `json:"top_p" yaml:"top_p"`
TopK *int `json:"top_k" yaml:"top_k"`
Temperature *float64 `json:"temperature" yaml:"temperature"`
Maxtokens *int `json:"max_tokens" yaml:"max_tokens"`
Echo bool `json:"echo"`
TopP *float64 `json:"top_p,omitempty" yaml:"top_p,omitempty"`
TopK *int `json:"top_k,omitempty" yaml:"top_k,omitempty"`
Temperature *float64 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
Maxtokens *int `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"`
Echo bool `json:"echo,omitempty" yaml:"echo,omitempty"`
// Custom parameters - not present in the OpenAI API
Batch int `json:"batch" yaml:"batch"`
IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"`
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
Batch int `json:"batch,omitempty" yaml:"batch,omitempty"`
IgnoreEOS bool `json:"ignore_eos,omitempty" yaml:"ignore_eos,omitempty"`
RepeatPenalty float64 `json:"repeat_penalty,omitempty" yaml:"repeat_penalty,omitempty"`
RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"`
RepeatLastN int `json:"repeat_last_n,omitempty" yaml:"repeat_last_n,omitempty"`
Keep int `json:"n_keep" yaml:"n_keep"`
Keep int `json:"n_keep,omitempty" yaml:"n_keep,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
TFZ *float64 `json:"tfz" yaml:"tfz"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty" yaml:"frequency_penalty,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty" yaml:"presence_penalty,omitempty"`
TFZ *float64 `json:"tfz,omitempty" yaml:"tfz,omitempty"`
TypicalP *float64 `json:"typical_p" yaml:"typical_p"`
Seed *int `json:"seed" yaml:"seed"`
TypicalP *float64 `json:"typical_p,omitempty" yaml:"typical_p,omitempty"`
Seed *int `json:"seed,omitempty" yaml:"seed,omitempty"`
NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"`
RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
NegativePrompt string `json:"negative_prompt,omitempty" yaml:"negative_prompt,omitempty"`
RopeFreqBase float32 `json:"rope_freq_base,omitempty" yaml:"rope_freq_base,omitempty"`
RopeFreqScale float32 `json:"rope_freq_scale,omitempty" yaml:"rope_freq_scale,omitempty"`
NegativePromptScale float32 `json:"negative_prompt_scale,omitempty" yaml:"negative_prompt_scale,omitempty"`
// Diffusers
ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
ClipSkip int `json:"clip_skip,omitempty" yaml:"clip_skip,omitempty"`
// RWKV (?)
Tokenizer string `json:"tokenizer" yaml:"tokenizer"`
Tokenizer string `json:"tokenizer,omitempty" yaml:"tokenizer,omitempty"`
}

View File

@@ -5,8 +5,9 @@ type LocalAIRequest interface {
ModelName(*string) string
}
// @Description BasicModelRequest contains the basic model request fields
type BasicModelRequest struct {
Model string `json:"model" yaml:"model"`
Model string `json:"model,omitempty" yaml:"model,omitempty"`
// TODO: Should this also include the following fields from the OpenAI side of the world?
// If so, changes should be made to core/http/middleware/request.go to match

View File

@@ -0,0 +1,13 @@
package schema_test
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestSchema(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "LocalAI Schema test suite")
}

View File

@@ -8,7 +8,7 @@ import (
"github.com/rs/zerolog/log"
)
func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend], systemState *system.SystemState) error {
func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend, any], systemState *system.SystemState) error {
utils.ResetDownloadTimers()
g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})

View File

@@ -14,8 +14,8 @@ import (
type GalleryService struct {
appConfig *config.ApplicationConfig
sync.Mutex
ModelGalleryChannel chan GalleryOp[gallery.GalleryModel]
BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend]
ModelGalleryChannel chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]
BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend, any]
modelLoader *model.ModelLoader
statuses map[string]*GalleryOpStatus
@@ -24,8 +24,8 @@ type GalleryService struct {
func NewGalleryService(appConfig *config.ApplicationConfig, ml *model.ModelLoader) *GalleryService {
return &GalleryService{
appConfig: appConfig,
ModelGalleryChannel: make(chan GalleryOp[gallery.GalleryModel]),
BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend]),
ModelGalleryChannel: make(chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]),
BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend, any]),
modelLoader: ml,
statuses: make(map[string]*GalleryOpStatus),
}

View File

@@ -9,10 +9,11 @@ import (
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
)
func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *config.ModelConfigLoader, systemState *system.SystemState) error {
func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig], cl *config.ModelConfigLoader, systemState *system.SystemState) error {
utils.ResetDownloadTimers()
g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})
@@ -118,28 +119,32 @@ func ApplyGalleryFromString(systemState *system.SystemState, modelLoader *model.
// processModelOperation handles the installation or deletion of a model
func processModelOperation(
op *GalleryOp[gallery.GalleryModel],
op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig],
systemState *system.SystemState,
modelLoader *model.ModelLoader,
enforcePredownloadScans bool,
automaticallyInstallBackend bool,
progressCallback func(string, string, string, float64),
) error {
// delete a model
if op.Delete {
switch {
case op.Delete:
return gallery.DeleteModelFromSystem(systemState, op.GalleryElementName)
}
// if the request contains a gallery name, we apply the gallery from the gallery list
if op.GalleryElementName != "" {
case op.GalleryElement != nil:
installedModel, err := gallery.InstallModel(
systemState, op.GalleryElement.Name,
op.GalleryElement,
op.Req.Overrides,
progressCallback, enforcePredownloadScans)
if automaticallyInstallBackend && installedModel.Backend != "" {
log.Debug().Msgf("Installing backend %q", installedModel.Backend)
if err := gallery.InstallBackendFromGallery(op.BackendGalleries, systemState, modelLoader, installedModel.Backend, progressCallback, false); err != nil {
return err
}
}
return err
case op.GalleryElementName != "":
return gallery.InstallModelFromGallery(op.Galleries, op.BackendGalleries, systemState, modelLoader, op.GalleryElementName, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend)
// } else if op.ConfigURL != "" {
// err := startup.InstallModels(op.Galleries, modelPath, enforcePredownloadScans, progressCallback, op.ConfigURL)
// if err != nil {
// return err
// }
// return cl.Preload(modelPath)
} else {
default:
return installModelFromRemoteConfig(systemState, modelLoader, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend, op.BackendGalleries)
}
}

View File

@@ -5,12 +5,16 @@ import (
"github.com/mudler/LocalAI/pkg/xsync"
)
type GalleryOp[T any] struct {
type GalleryOp[T any, E any] struct {
ID string
GalleryElementName string
Delete bool
Req T
Req T
// If specified, we install directly the gallery element
GalleryElement *E
Galleries []config.Gallery
BackendGalleries []config.Gallery
}

View File

@@ -1,15 +1,20 @@
package startup
import (
"encoding/json"
"errors"
"fmt"
"os"
"path"
"path/filepath"
"strings"
"time"
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/gallery/importers"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
@@ -25,7 +30,7 @@ const (
// InstallModels will preload models from the given list of URLs and galleries
// It will download the model if it is not already present in the model path
// It will also try to resolve if the model is an embedded model YAML configuration
func InstallModels(galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
func InstallModels(galleryService *services.GalleryService, galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
// create an error that groups all errors
var err error
@@ -154,7 +159,50 @@ func InstallModels(galleries, backendGalleries []config.Gallery, systemState *sy
err = errors.Join(err, e)
} else if !found {
log.Warn().Msgf("[startup] failed resolving model '%s'", url)
err = errors.Join(err, fmt.Errorf("failed resolving model '%s'", url))
if galleryService == nil {
err = errors.Join(err, fmt.Errorf("cannot start autoimporter, not sure how to handle this uri"))
continue
}
// TODO: we should just use the discoverModelConfig here and default to this.
modelConfig, discoverErr := importers.DiscoverModelConfig(url, json.RawMessage{})
if discoverErr != nil {
err = errors.Join(discoverErr, fmt.Errorf("failed to discover model config: %w", err))
continue
}
uuid, uuidErr := uuid.NewUUID()
if uuidErr != nil {
err = errors.Join(uuidErr, fmt.Errorf("failed to generate UUID: %w", uuidErr))
continue
}
galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Req: gallery.GalleryModel{
Overrides: map[string]interface{}{},
},
ID: uuid.String(),
GalleryElementName: modelConfig.Name,
GalleryElement: &modelConfig,
BackendGalleries: backendGalleries,
}
var status *services.GalleryOpStatus
// wait for op to finish
for {
status = galleryService.GetStatus(uuid.String())
if status != nil && status.Processed {
break
}
time.Sleep(1 * time.Second)
}
if status.Error != nil {
return status.Error
}
log.Info().Msgf("[startup] imported model '%s' from '%s'", modelConfig.Name, url)
}
}
}

View File

@@ -33,7 +33,7 @@ var _ = Describe("Preload test", func() {
url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
fileName := fmt.Sprintf("%s.yaml", "phi-2")
InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
resultFile := filepath.Join(tmpdir, fileName)
@@ -46,7 +46,7 @@ var _ = Describe("Preload test", func() {
url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
err := InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
err := InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
Expect(err).ToNot(HaveOccurred())
resultFile := filepath.Join(tmpdir, fileName)

View File

@@ -11,9 +11,6 @@ import (
"github.com/mudler/LocalAI/pkg/utils"
"github.com/Masterminds/sprig/v3"
"github.com/nikolalohinski/gonja/v2"
"github.com/nikolalohinski/gonja/v2/exec"
)
// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go?
@@ -21,17 +18,15 @@ import (
type TemplateType int
type templateCache struct {
mu sync.Mutex
templatesPath string
templates map[TemplateType]map[string]*template.Template
jinjaTemplates map[TemplateType]map[string]*exec.Template
mu sync.Mutex
templatesPath string
templates map[TemplateType]map[string]*template.Template
}
func newTemplateCache(templatesPath string) *templateCache {
tc := &templateCache{
templatesPath: templatesPath,
templates: make(map[TemplateType]map[string]*template.Template),
jinjaTemplates: make(map[TemplateType]map[string]*exec.Template),
templatesPath: templatesPath,
templates: make(map[TemplateType]map[string]*template.Template),
}
return tc
}
@@ -85,78 +80,6 @@ func (tc *templateCache) loadTemplateIfExists(templateType TemplateType, templat
return nil
}
func (tc *templateCache) initializeJinjaTemplateMapKey(tt TemplateType) {
if _, ok := tc.jinjaTemplates[tt]; !ok {
tc.jinjaTemplates[tt] = make(map[string]*exec.Template)
}
}
func (tc *templateCache) loadJinjaTemplateIfExists(templateType TemplateType, templateName string) error {
// Check if the template was already loaded
if _, ok := tc.jinjaTemplates[templateType][templateName]; ok {
return nil
}
// Check if the model path exists
// skip any error here - we run anyway if a template does not exist
modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName)
dat := ""
file := filepath.Join(tc.templatesPath, modelTemplateFile)
// Security check
if err := utils.VerifyPath(modelTemplateFile, tc.templatesPath); err != nil {
return fmt.Errorf("template file outside path: %s", file)
}
// can either be a file in the system or a string with the template
if utils.ExistsInPath(tc.templatesPath, modelTemplateFile) {
d, err := os.ReadFile(file)
if err != nil {
return err
}
dat = string(d)
} else {
dat = templateName
}
tmpl, err := gonja.FromString(dat)
if err != nil {
return err
}
tc.jinjaTemplates[templateType][templateName] = tmpl
return nil
}
func (tc *templateCache) evaluateJinjaTemplate(templateType TemplateType, templateNameOrContent string, in map[string]interface{}) (string, error) {
tc.mu.Lock()
defer tc.mu.Unlock()
tc.initializeJinjaTemplateMapKey(templateType)
m, ok := tc.jinjaTemplates[templateType][templateNameOrContent]
if !ok {
// return "", fmt.Errorf("template not loaded: %s", templateName)
loadErr := tc.loadJinjaTemplateIfExists(templateType, templateNameOrContent)
if loadErr != nil {
return "", loadErr
}
m = tc.jinjaTemplates[templateType][templateNameOrContent] // ok is not important since we check m on the next line, and wealready checked
}
if m == nil {
return "", fmt.Errorf("failed loading a template for %s", templateNameOrContent)
}
var buf bytes.Buffer
data := exec.NewContext(in)
if err := m.Execute(&buf, data); err != nil {
return "", err
}
return buf.String(), nil
}
func (tc *templateCache) evaluateTemplate(templateType TemplateType, templateNameOrContent string, in interface{}) (string, error) {
tc.mu.Lock()
defer tc.mu.Unlock()

View File

@@ -86,10 +86,6 @@ func (e *Evaluator) EvaluateTemplateForPrompt(templateType TemplateType, config
return in.Input, nil
}
if config.TemplateConfig.JinjaTemplate {
return e.evaluateJinjaTemplateForPrompt(templateType, template, in)
}
return e.cache.evaluateTemplate(templateType, template, in)
}
@@ -97,72 +93,7 @@ func (e *Evaluator) evaluateTemplateForChatMessage(templateName string, messageD
return e.cache.evaluateTemplate(ChatMessageTemplate, templateName, messageData)
}
func (e *Evaluator) templateJinjaChat(templateName string, messageData []ChatMessageTemplateData, funcs []functions.Function) (string, error) {
conversation := make(map[string]interface{})
messages := make([]map[string]interface{}, len(messageData))
// convert from ChatMessageTemplateData to what the jinja template expects
for _, message := range messageData {
// TODO: this seems to cover minimum text templates. Can be expanded to cover more complex interactions
var data []byte
data, _ = json.Marshal(message.FunctionCall)
messages = append(messages, map[string]interface{}{
"role": message.RoleName,
"content": message.Content,
"tool_call": string(data),
})
}
conversation["messages"] = messages
// if tools are detected, add these
if len(funcs) > 0 {
conversation["tools"] = funcs
}
return e.cache.evaluateJinjaTemplate(ChatMessageTemplate, templateName, conversation)
}
func (e *Evaluator) evaluateJinjaTemplateForPrompt(templateType TemplateType, templateName string, in PromptTemplateData) (string, error) {
conversation := make(map[string]interface{})
conversation["system_prompt"] = in.SystemPrompt
conversation["content"] = in.Input
return e.cache.evaluateJinjaTemplate(templateType, templateName, conversation)
}
func (e *Evaluator) TemplateMessages(input schema.OpenAIRequest, messages []schema.Message, config *config.ModelConfig, funcs []functions.Function, shouldUseFn bool) string {
if config.TemplateConfig.JinjaTemplate {
var messageData []ChatMessageTemplateData
for messageIndex, i := range messages {
fcall := i.FunctionCall
if len(i.ToolCalls) > 0 {
fcall = i.ToolCalls
}
messageData = append(messageData, ChatMessageTemplateData{
SystemPrompt: config.SystemPrompt,
Role: config.Roles[i.Role],
RoleName: i.Role,
Content: i.StringContent,
FunctionCall: fcall,
FunctionName: i.Name,
LastMessage: messageIndex == (len(messages) - 1),
Function: config.Grammar != "" && (messageIndex == (len(messages) - 1)),
MessageIndex: messageIndex,
})
}
templatedInput, err := e.templateJinjaChat(config.TemplateConfig.ChatMessage, messageData, funcs)
if err == nil {
return templatedInput
}
}
var predInput string
suppressConfigSystemPrompt := false
mess := []string{}

View File

@@ -191,25 +191,6 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in
},
}
var jinjaTest map[string]map[string]interface{} = map[string]map[string]interface{}{
"user": {
"expected": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"config": &config.ModelConfig{
TemplateConfig: config.TemplateConfig{
ChatMessage: toolCallJinja,
JinjaTemplate: true,
},
},
"functions": []functions.Function{},
"shouldUseFn": false,
"messages": []schema.Message{
{
Role: "user",
StringContent: "A long time ago in a galaxy far, far away...",
},
},
},
}
var _ = Describe("Templates", func() {
Context("chat message ChatML", func() {
var evaluator *Evaluator
@@ -237,17 +218,4 @@ var _ = Describe("Templates", func() {
})
}
})
Context("chat message jinja", func() {
var evaluator *Evaluator
BeforeEach(func() {
evaluator = NewEvaluator("")
})
for key := range jinjaTest {
foo := jinjaTest[key]
It("renders correctly `"+key+"`", func() {
templated := evaluator.TemplateMessages(schema.OpenAIRequest{}, foo["messages"].([]schema.Message), foo["config"].(*config.ModelConfig), foo["functions"].([]functions.Function), foo["shouldUseFn"].(bool))
Expect(templated).To(Equal(foo["expected"]), templated)
})
}
})
})

View File

@@ -128,16 +128,44 @@ Models can be also preloaded or downloaded on demand. To learn about model galle
#### YAML configuration
To use the `llama.cpp` backend, specify `llama` as the backend in the YAML file:
To use the `llama.cpp` backend, specify `llama-cpp` as the backend in the YAML file:
```yaml
name: llama
backend: llama
backend: llama-cpp
parameters:
# Relative to the models path
model: file.gguf
```
#### Backend Options
The `llama.cpp` backend supports additional configuration options that can be specified in the `options` field of your model YAML configuration. These options allow fine-tuning of the backend behavior:
| Option | Type | Description | Example |
|--------|------|-------------|---------|
| `use_jinja` or `jinja` | boolean | Enable Jinja2 template processing for chat templates. When enabled, the backend uses Jinja2-based chat templates from the model for formatting messages. | `use_jinja:true` |
| `context_shift` | boolean | Enable context shifting, which allows the model to dynamically adjust context window usage. | `context_shift:true` |
| `cache_ram` | integer | Set the maximum RAM cache size in MiB for KV cache. Use `-1` for unlimited (default). | `cache_ram:2048` |
| `parallel` or `n_parallel` | integer | Enable parallel request processing. When set to a value greater than 1, enables continuous batching for handling multiple requests concurrently. | `parallel:4` |
| `grpc_servers` or `rpc_servers` | string | Comma-separated list of gRPC server addresses for distributed inference. Allows distributing workload across multiple llama.cpp workers. | `grpc_servers:localhost:50051,localhost:50052` |
**Example configuration with options:**
```yaml
name: llama-model
backend: llama
parameters:
model: model.gguf
options:
- use_jinja:true
- context_shift:true
- cache_ram:4096
- parallel:2
```
**Note:** The `parallel` option can also be set via the `LLAMACPP_PARALLEL` environment variable, and `grpc_servers` can be set via the `LLAMACPP_GRPC_SERVERS` environment variable. Options specified in the YAML file take precedence over environment variables.
#### Reference
- [llama](https://github.com/ggerganov/llama.cpp)

View File

@@ -31,10 +31,14 @@ See [Installer]({{% relref "docs/advanced/installer" %}}) for all the supported
### macOS Download
For MacOS a DMG is available:
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
</a>
> Note: the DMGs are not signed by Apple and shows quarantined after install. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
### Run with docker
{{% alert icon="💡" %}}

View File

@@ -51,19 +51,20 @@ LocalAI is more than just a single tool - it's a complete ecosystem:
## Getting Started
The fastest way to get started is with our one-line installer:
```bash
curl https://localai.io/install.sh | sh
```
### macOS Download
You can use the DMG application for Mac:
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
</a>
Or use Docker for a quick start:
> Note: the DMGs are not signed by Apple shows as quarantined. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
## Docker
You can use Docker for a quick start:
```bash
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
@@ -71,6 +72,14 @@ docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
For more detailed installation options and configurations, see our [Getting Started guide](/basics/getting_started/).
## One-liner
The fastest way to get started is with our one-line installer (Linux):
```bash
curl https://localai.io/install.sh | sh
```
## Key Features
- **Text Generation**: Run various LLMs locally

View File

@@ -9,11 +9,15 @@ LocalAI binaries are available for both Linux and MacOS platforms and can be exe
### macOS Download
You can download the DMG and install the application:
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
</a>
Use the following one-liner command in your terminal to download and run LocalAI on Linux or MacOS:
> Note: the DMGs are not signed by Apple as quarantined. See https://github.com/mudler/LocalAI/issues/6268 for a workaround, fix is tracked here: https://github.com/mudler/LocalAI/issues/6244
Otherwise, use the following one-liner command in your terminal to download and run LocalAI on Linux or MacOS:
```bash
curl -Lo local-ai "https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-$(uname -s)-$(uname -m)" && chmod +x local-ai && ./local-ai

View File

@@ -1,3 +1,3 @@
{
"version": "v3.6.0"
"version": "v3.7.0"
}

View File

@@ -1,5 +1,3 @@
module github.com/McShelby/hugo-theme-relearn.git
go 1.19
require github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 // indirect

View File

@@ -1,4 +0,0 @@
github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 h1:SmpwwN3DNzJWbV+IT8gaFu07ENUFpCvKou5BHYUKuVs=
github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200/go.mod h1:kx8MBj9T7SFR8ZClWvKZPmmUxBaltkoXvnWlZZcSnYA=
github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2 v2.21100.20000/go.mod h1:mFberT6ZtcchrsDtfvJM7aAH2bDKLdOnruUHl0hlapI=
github.com/twbs/bootstrap v5.3.2+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0=

View File

File diff suppressed because it is too large Load Diff

View File

@@ -6,15 +6,20 @@ config_file: |
backend: "llama-cpp"
template:
chat_message: |
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
{{ else if eq .RoleName "tool" -}}
<|im_start|>{{if eq .RoleName "tool" }}user{{else}}{{ .RoleName }}{{end}}
{{ if eq .RoleName "tool" -}}
<tool_response>
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if eq .RoleName "tool" -}}
</tool_response>
{{ end -}}
{{ if .FunctionCall -}}
<tool_call>
{{toJson .FunctionCall}}
</tool_call>
{{ end -}}<|im_end|>
function: |
<|im_start|>system

48
go.mod
View File

@@ -10,9 +10,9 @@ require (
github.com/Masterminds/sprig/v3 v3.3.0
github.com/alecthomas/kong v1.12.1
github.com/charmbracelet/glamour v0.10.0
github.com/containerd/containerd v1.7.28
github.com/containerd/containerd v1.7.29
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2
github.com/ebitengine/purego v0.9.0
github.com/ebitengine/purego v0.9.1
github.com/fsnotify/fsnotify v1.9.0
github.com/go-audio/wav v1.1.0
github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46
@@ -26,19 +26,18 @@ require (
github.com/gpustack/gguf-parser-go v0.22.1
github.com/hpcloud/tail v1.0.0
github.com/ipfs/go-log v1.0.5
github.com/jaypipes/ghw v0.19.1
github.com/jaypipes/ghw v0.20.0
github.com/joho/godotenv v1.5.1
github.com/klauspost/cpuid/v2 v2.3.0
github.com/libp2p/go-libp2p v0.43.0
github.com/lithammer/fuzzysearch v1.1.8
github.com/mholt/archiver/v3 v3.5.1
github.com/microcosm-cc/bluemonday v1.0.27
github.com/modelcontextprotocol/go-sdk v1.0.0
github.com/mudler/cogito v0.4.0
github.com/mudler/edgevpn v0.31.0
github.com/modelcontextprotocol/go-sdk v1.1.0
github.com/mudler/cogito v0.5.1
github.com/mudler/edgevpn v0.31.1
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82
github.com/nikolalohinski/gonja/v2 v2.4.1
github.com/onsi/ginkgo/v2 v2.26.0
github.com/onsi/ginkgo/v2 v2.27.2
github.com/onsi/gomega v1.38.2
github.com/otiai10/copy v1.14.1
github.com/otiai10/openaigo v1.7.0
@@ -52,14 +51,15 @@ require (
github.com/streamer45/silero-vad-go v0.2.1
github.com/stretchr/testify v1.11.1
github.com/swaggo/swag v1.16.6
github.com/testcontainers/testcontainers-go v0.38.0
github.com/testcontainers/testcontainers-go v0.40.0
github.com/tmc/langchaingo v0.1.14
github.com/valyala/fasthttp v1.55.0
github.com/valyala/fasthttp v1.68.0
go.opentelemetry.io/otel v1.38.0
go.opentelemetry.io/otel/exporters/prometheus v0.60.0
go.opentelemetry.io/otel/metric v1.38.0
go.opentelemetry.io/otel/sdk/metric v1.38.0
google.golang.org/grpc v1.76.0
google.golang.org/protobuf v1.36.8
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.1
oras.land/oras-go/v2 v2.6.0
@@ -78,7 +78,6 @@ require (
github.com/containerd/platforms v0.2.1 // indirect
github.com/cpuguy83/dockercfg v0.3.2 // indirect
github.com/distribution/reference v0.6.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/fasthttp/websocket v1.5.8 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fredbi/uri v1.1.1 // indirect
@@ -135,7 +134,7 @@ require (
github.com/prometheus/otlptranslator v0.0.2 // indirect
github.com/rymdport/portal v0.4.2 // indirect
github.com/savsgio/gotils v0.0.0-20240303185622-093b76447511 // indirect
github.com/shirou/gopsutil/v4 v4.25.5 // indirect
github.com/shirou/gopsutil/v4 v4.25.6 // indirect
github.com/srwiley/oksvg v0.0.0-20221011165216-be6e8873101c // indirect
github.com/srwiley/rasterx v0.0.0-20220730225603-2ab79fcdd4ef // indirect
github.com/wlynxg/anet v0.0.5 // indirect
@@ -143,13 +142,13 @@ require (
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
go.uber.org/automaxprocs v1.6.0 // indirect
go.uber.org/mock v0.5.2 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/image v0.25.0 // indirect
golang.org/x/oauth2 v0.30.0 // indirect
golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053 // indirect
golang.org/x/time v0.12.0 // indirect
google.golang.org/protobuf v1.36.8 // indirect
)
require (
@@ -160,7 +159,7 @@ require (
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/Microsoft/hcsshim v0.11.7 // indirect
github.com/alecthomas/chroma/v2 v2.14.0 // indirect
github.com/andybalholm/brotli v1.1.0 // indirect
github.com/andybalholm/brotli v1.2.0 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/benbjohnson/clock v1.3.5 // indirect
@@ -180,7 +179,7 @@ require (
github.com/dlclark/regexp2 v1.11.0 // indirect
github.com/docker/cli v27.0.3+incompatible // indirect
github.com/docker/distribution v2.8.2+incompatible // indirect
github.com/docker/docker v28.5.1+incompatible
github.com/docker/docker v28.5.2+incompatible
github.com/docker/docker-credential-helpers v0.7.0 // indirect
github.com/docker/go-connections v0.6.0
github.com/docker/go-units v0.5.0 // indirect
@@ -222,7 +221,7 @@ require (
github.com/jaypipes/pcidb v1.1.1 // indirect
github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/klauspost/compress v1.18.1 // indirect
github.com/klauspost/pgzip v1.2.5 // indirect
github.com/koron/go-ssdp v0.0.6 // indirect
github.com/libp2p/go-buffer-pool v0.1.0 // indirect
@@ -303,7 +302,6 @@ require (
github.com/tklauser/numcpus v0.10.0 // indirect
github.com/ulikunitz/xz v0.5.14 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/tcplisten v1.0.0 // indirect
github.com/vbatts/tar-split v0.11.3 // indirect
github.com/vishvananda/netlink v1.3.0 // indirect
github.com/vishvananda/netns v0.0.5 // indirect
@@ -319,15 +317,15 @@ require (
go.uber.org/fx v1.24.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/crypto v0.41.0 // indirect
golang.org/x/crypto v0.43.0 // indirect
golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 // indirect
golang.org/x/mod v0.27.0 // indirect
golang.org/x/net v0.43.0 // indirect
golang.org/x/sync v0.16.0 // indirect
golang.org/x/mod v0.28.0 // indirect
golang.org/x/net v0.46.0 // indirect
golang.org/x/sync v0.17.0 // indirect
golang.org/x/sys v0.37.0 // indirect
golang.org/x/term v0.34.0 // indirect
golang.org/x/text v0.28.0 // indirect
golang.org/x/tools v0.36.0 // indirect
golang.org/x/term v0.36.0 // indirect
golang.org/x/text v0.30.0 // indirect
golang.org/x/tools v0.37.0 // indirect
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb // indirect
golang.zx2c4.com/wireguard/windows v0.5.3 // indirect

102
go.sum
View File

@@ -23,8 +23,6 @@ github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg
github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
@@ -44,8 +42,8 @@ github.com/alecthomas/kong v1.12.1/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruP
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
@@ -89,8 +87,8 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
github.com/containerd/containerd v1.7.28 h1:Nsgm1AtcmEh4AHAJ4gGlNSaKgXiNccU270Dnf81FQ3c=
github.com/containerd/containerd v1.7.28/go.mod h1:azUkWcOvHrWvaiUjSQH0fjzuHIwSPg1WL5PshGP4Szs=
github.com/containerd/containerd v1.7.29 h1:90fWABQsaN9mJhGkoVnuzEY+o1XDPbg9BTC9QTAHnuE=
github.com/containerd/containerd v1.7.29/go.mod h1:azUkWcOvHrWvaiUjSQH0fjzuHIwSPg1WL5PshGP4Szs=
github.com/containerd/continuity v0.4.4 h1:/fNVfTJ7wIl/YPMHjf+5H32uFhl63JucB34PlCpMKII=
github.com/containerd/continuity v0.4.4/go.mod h1:/lNJvtJKUQStBzpVQ1+rasXO1LAWtUQssk28EZvJ3nE=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
@@ -135,8 +133,8 @@ github.com/docker/cli v27.0.3+incompatible h1:usGs0/BoBW8MWxGeEtqPMkzOY56jZ6kYlS
github.com/docker/cli v27.0.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8=
github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker v28.5.1+incompatible h1:Bm8DchhSD2J6PsFzxC35TZo4TLGR2PdW/E69rU45NhM=
github.com/docker/docker v28.5.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM=
github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=
github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94=
@@ -147,10 +145,8 @@ github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/ebitengine/purego v0.9.0 h1:mh0zpKBIXDceC63hpvPuGLiJ8ZAa3DfrFTudmfi8A4k=
github.com/ebitengine/purego v0.9.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/ebitengine/purego v0.9.1 h1:a/k2f2HQU3Pi399RPW1MOaZyhKJL9w/xFpKAg4q1s0A=
github.com/ebitengine/purego v0.9.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
@@ -186,8 +182,8 @@ github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BN
github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo=
github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M=
github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk=
github.com/gkampitakis/go-snaps v0.5.14 h1:3fAqdB6BCPKHDMHAKRwtPUwYexKtGrNuw8HX/T/4neo=
github.com/gkampitakis/go-snaps v0.5.14/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc=
github.com/gkampitakis/go-snaps v0.5.15 h1:amyJrvM1D33cPHwVrjo9jQxX8g/7E2wYdZ+01KS3zGE=
github.com/gkampitakis/go-snaps v0.5.15/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc=
github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
@@ -361,8 +357,8 @@ github.com/ipld/go-ipld-prime v0.21.0 h1:n4JmcpOlPDIxBcY037SVfpd1G+Sj1nKZah0m6QH
github.com/ipld/go-ipld-prime v0.21.0/go.mod h1:3RLqy//ERg/y5oShXXdx5YIp50cFGOanyMctpPjsvxQ=
github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus=
github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc=
github.com/jaypipes/ghw v0.19.1 h1:Lhybk6aadgEJqIxeS0h07UOL/EgMGIdxbAy6V8J7RgY=
github.com/jaypipes/ghw v0.19.1/go.mod h1:GPrvwbtPoxYUenr74+nAnWbardIZq600vJDD5HnPsPE=
github.com/jaypipes/ghw v0.20.0 h1:8efvHHtyrj0P4qVZ9KE43iW9tMThKoh6dEOo38f3a4w=
github.com/jaypipes/ghw v0.20.0/go.mod h1:GPrvwbtPoxYUenr74+nAnWbardIZq600vJDD5HnPsPE=
github.com/jaypipes/pcidb v1.1.1 h1:QmPhpsbmmnCwZmHeYAATxEaoRuiMAJusKYkUncMC0ro=
github.com/jaypipes/pcidb v1.1.1/go.mod h1:x27LT2krrUgjf875KxQXKB0Ha/YXLdZRVmw6hH0G7g8=
github.com/jbenet/go-temp-err-catcher v0.1.0 h1:zpb3ZH6wIE8Shj2sKS+khgRvf7T7RABoLk/+KKHggpk=
@@ -389,8 +385,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
@@ -514,8 +510,8 @@ github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
github.com/modelcontextprotocol/go-sdk v1.0.0 h1:Z4MSjLi38bTgLrd/LjSmofqRqyBiVKRyQSJgw8q8V74=
github.com/modelcontextprotocol/go-sdk v1.0.0/go.mod h1:nYtYQroQ2KQiM0/SbyEPUWQ6xs4B95gJjEalc9AQyOs=
github.com/modelcontextprotocol/go-sdk v1.1.0 h1:Qjayg53dnKC4UZ+792W21e4BpwEZBzwgRW6LrjLWSwA=
github.com/modelcontextprotocol/go-sdk v1.1.0/go.mod h1:6fM3LCm3yV7pAs8isnKLn07oKtB0MP9LHd3DfAcKw10=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -527,10 +523,10 @@ github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7P
github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/mudler/cogito v0.4.0 h1:CkdzbQplQW6LDUM6mTqupGFiQVluy0nx7xbYgAfBVKs=
github.com/mudler/cogito v0.4.0/go.mod h1:abMwl+CUjCp87IufA2quZdZt0bbLaHHN79o17HbUKxU=
github.com/mudler/edgevpn v0.31.0 h1:CXwxQ2ZygzE7iKGl1J+vq9pL5PvsW2uc3qI/zgpNpp4=
github.com/mudler/edgevpn v0.31.0/go.mod h1:DKgh9Wu/NM3UbZoPyheMXFvpu1dSLkXrqAOy3oKJN3I=
github.com/mudler/cogito v0.5.1 h1:KK9F7pNJUopewiZRsKnDEGu49+22fPrxWYxpoWQWN7s=
github.com/mudler/cogito v0.5.1/go.mod h1:2uhEElCTq8eXSsqJ1JF01oA5h9niXSELVKqCF1PqjEw=
github.com/mudler/edgevpn v0.31.1 h1:7qegiDWd0kAg6ljhNHxqvp8hbo/6BbzSdbb7/2WZfiY=
github.com/mudler/edgevpn v0.31.1/go.mod h1:ftV5B0nKFzm4R8vR80UYnCb2nf7lxCRgAALxUEEgCf8=
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU=
@@ -571,16 +567,14 @@ github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
github.com/nicksnyder/go-i18n/v2 v2.5.1 h1:IxtPxYsR9Gp60cGXjfuR/llTqV8aYMsC472zD0D1vHk=
github.com/nicksnyder/go-i18n/v2 v2.5.1/go.mod h1:DrhgsSDZxoAfvVrBVLXoxZn/pN5TXqaDbq7ju94viiQ=
github.com/nikolalohinski/gonja/v2 v2.4.1 h1:eV/OB0FQ2v3LbQkcr3S+YJGsJV3AP3I83EvTaa5zwD0=
github.com/nikolalohinski/gonja/v2 v2.4.1/go.mod h1:UIzXPVuOsr5h7dZ5DUbqk3/Z7oFA/NLGQGMjqT4L2aU=
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
github.com/onsi/ginkgo/v2 v2.26.0 h1:1J4Wut1IlYZNEAWIV3ALrT9NfiaGW2cDCJQSFQMs/gE=
github.com/onsi/ginkgo/v2 v2.26.0/go.mod h1:qhEywmzWTBUY88kfO0BRvX4py7scov9yR+Az2oavUzw=
github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns=
github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo=
github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
@@ -662,8 +656,6 @@ github.com/polydawn/refmt v0.89.0 h1:ADJTApkvkeBZsN0tBTx8QjpD9JkmxbKp0cxfr9qszm4
github.com/polydawn/refmt v0.89.0/go.mod h1:/zvteZs/GwLtCgZ4BL6CBsk9IKIlexP43ObX9AxTqTw=
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
@@ -711,8 +703,8 @@ github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8G
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/shirou/gopsutil/v3 v3.24.5 h1:i0t8kL+kQTvpAYToeuiVk3TgDeKOFioZO3Ztz/iZ9pI=
github.com/shirou/gopsutil/v3 v3.24.5/go.mod h1:bsoOS1aStSs9ErQ1WWfxllSeS1K5D+U30r2NfcubMVk=
github.com/shirou/gopsutil/v4 v4.25.5 h1:rtd9piuSMGeU8g1RMXjZs9y9luK5BwtnG7dZaQUJAsc=
github.com/shirou/gopsutil/v4 v4.25.5/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
github.com/shirou/gopsutil/v4 v4.25.6 h1:kLysI2JsKorfaFPcYmcJqbzROzsBWEOAtw6A7dIfqXs=
github.com/shirou/gopsutil/v4 v4.25.6/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM=
github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
@@ -787,8 +779,8 @@ github.com/swaggo/files/v2 v2.0.2/go.mod h1:TVqetIzZsO9OhHX1Am9sRf9LdrFZqoK49N37
github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI=
github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg=
github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
github.com/testcontainers/testcontainers-go v0.38.0 h1:d7uEapLcv2P8AvH8ahLqDMMxda2W9gQN1nRbHS28HBw=
github.com/testcontainers/testcontainers-go v0.38.0/go.mod h1:C52c9MoHpWO+C4aqmgSU+hxlR5jlEayWtgYrb8Pzz1w=
github.com/testcontainers/testcontainers-go v0.40.0 h1:pSdJYLOVgLE8YdUY2FHQ1Fxu+aMnb6JfVz1mxk7OeMU=
github.com/testcontainers/testcontainers-go v0.40.0/go.mod h1:FSXV5KQtX2HAMlm7U3APNyLkkap35zNLxukw9oBi/MY=
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
@@ -813,10 +805,8 @@ github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60Nt
github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
github.com/valyala/fasthttp v1.68.0 h1:v12Nx16iepr8r9ySOwqI+5RBJ/DqTxhOy1HrHoDFnok=
github.com/valyala/fasthttp v1.68.0/go.mod h1:5EXiRfYQAoiO/khu4oU9VISC/eVY6JqmSpPJoHCKsz4=
github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
github.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXSTY7wGrAokY=
github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU=
@@ -837,6 +827,8 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofm
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
@@ -876,8 +868,6 @@ go.opentelemetry.io/proto/otlp v1.8.0 h1:fRAZQDcAFHySxpJ1TwlA1cJ4tvcrw7nXl9xWWC8
go.opentelemetry.io/proto/otlp v1.8.0/go.mod h1:tIeYOeNBU4cvmPqpaji1P+KbB4Oloai8wN4rWzRrFF0=
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
go.uber.org/dig v1.19.0 h1:BACLhebsYdpQ7IROQ1AGPjrXcP5dF80U3gKoFzbaq/4=
go.uber.org/dig v1.19.0/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE=
go.uber.org/fx v1.24.0 h1:wE8mruvpg2kiiL1Vqd0CC+tr0/24XIB10Iwp2lLWzkg=
@@ -913,8 +903,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE=
golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw=
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 h1:bsqhLWFR6G6xiQcb+JoGqdKdRU6WzPWmK8E0jxTjzo4=
golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
@@ -932,8 +922,8 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ=
golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc=
golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U=
golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -957,12 +947,14 @@ golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -974,8 +966,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181029174526-d69651ed3497/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -1009,6 +1001,8 @@ golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053 h1:dHQOQddU4YHS5gY33/6klKjq7Gp3WwMyOXGNp5nzRj8=
golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053/go.mod h1:+nZKN+XVh4LCiA9DV3ywrzN4gumyCnKjau3NGb9SGoE=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
@@ -1016,8 +1010,8 @@ golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4=
golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw=
golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -1027,8 +1021,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
@@ -1050,8 +1044,8 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=
golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

View File

@@ -79,6 +79,12 @@ func (f Functions) ToJSONStructure(name, args string) JSONFunctionStructure {
Type: "object",
Properties: property,
})
/*
js.AnyOf = append(js.OneOf, Item{
Type: "object",
Properties: property,
})
*/
}
return js
}

View File

@@ -13,99 +13,102 @@ import (
"github.com/rs/zerolog/log"
)
// @Description GrammarConfig contains configuration for grammar parsing
type GrammarConfig struct {
// ParallelCalls enables the LLM to return multiple function calls in the same response
ParallelCalls bool `yaml:"parallel_calls"`
ParallelCalls bool `yaml:"parallel_calls,omitempty" json:"parallel_calls,omitempty"`
DisableParallelNewLines bool `yaml:"disable_parallel_new_lines"`
DisableParallelNewLines bool `yaml:"disable_parallel_new_lines,omitempty" json:"disable_parallel_new_lines,omitempty"`
// MixedMode enables the LLM to return strings and not only JSON objects
// This is useful for models to not constraining returning only JSON and also messages back to the user
MixedMode bool `yaml:"mixed_mode"`
MixedMode bool `yaml:"mixed_mode,omitempty" json:"mixed_mode,omitempty"`
// NoMixedFreeString disables the mixed mode for free strings
// In this way if the LLM selects a free string, it won't be mixed necessarily with JSON objects.
// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
NoMixedFreeString bool `yaml:"no_mixed_free_string"`
NoMixedFreeString bool `yaml:"no_mixed_free_string,omitempty" json:"no_mixed_free_string,omitempty"`
// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
NoGrammar bool `yaml:"disable"`
NoGrammar bool `yaml:"disable,omitempty" json:"disable,omitempty"`
// Prefix is the suffix to append to the grammar when being generated
// This is useful when models prepend a tag before returning JSON
Prefix string `yaml:"prefix"`
Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
// ExpectStringsAfterJSON enables mixed string suffix
ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"`
ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json,omitempty" json:"expect_strings_after_json,omitempty"`
// PropOrder selects what order to print properties
// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
PropOrder string `yaml:"properties_order"`
PropOrder string `yaml:"properties_order,omitempty" json:"properties_order,omitempty"`
// SchemaType can be configured to use a specific schema type to force the grammar
// available : json, llama3.1
SchemaType string `yaml:"schema_type"`
SchemaType string `yaml:"schema_type,omitempty" json:"schema_type,omitempty"`
GrammarTriggers []GrammarTrigger `yaml:"triggers"`
GrammarTriggers []GrammarTrigger `yaml:"triggers,omitempty" json:"triggers,omitempty"`
}
// @Description GrammarTrigger defines a trigger word for grammar parsing
type GrammarTrigger struct {
// Trigger is the string that triggers the grammar
Word string `yaml:"word"`
Word string `yaml:"word,omitempty" json:"word,omitempty"`
}
// FunctionsConfig is the configuration for the tool/function call.
// @Description FunctionsConfig is the configuration for the tool/function call.
// It includes setting to map the function name and arguments from the response
// and, for instance, also if processing the requests with BNF grammars.
type FunctionsConfig struct {
// DisableNoAction disables the "no action" tool
// By default we inject a tool that does nothing and is used to return an answer from the LLM
DisableNoAction bool `yaml:"disable_no_action"`
DisableNoAction bool `yaml:"disable_no_action,omitempty" json:"disable_no_action,omitempty"`
// Grammar is the configuration for the grammar
GrammarConfig GrammarConfig `yaml:"grammar"`
GrammarConfig GrammarConfig `yaml:"grammar,omitempty" json:"grammar,omitempty"`
// NoActionFunctionName is the name of the function that does nothing. It defaults to "answer"
NoActionFunctionName string `yaml:"no_action_function_name"`
NoActionFunctionName string `yaml:"no_action_function_name,omitempty" json:"no_action_function_name,omitempty"`
// NoActionDescriptionName is the name of the function that returns the description of the no action function
NoActionDescriptionName string `yaml:"no_action_description_name"`
NoActionDescriptionName string `yaml:"no_action_description_name,omitempty" json:"no_action_description_name,omitempty"`
// ResponseRegex is a named regex to extract the function name and arguments from the response
ResponseRegex []string `yaml:"response_regex"`
ResponseRegex []string `yaml:"response_regex,omitempty" json:"response_regex,omitempty"`
// JSONRegexMatch is a regex to extract the JSON object from the response
JSONRegexMatch []string `yaml:"json_regex_match"`
JSONRegexMatch []string `yaml:"json_regex_match,omitempty" json:"json_regex_match,omitempty"`
// ArgumentRegex is a named regex to extract the arguments from the response. Use ArgumentRegexKey and ArgumentRegexValue to set the names of the named regex for key and value of the arguments.
ArgumentRegex []string `yaml:"argument_regex"`
ArgumentRegex []string `yaml:"argument_regex,omitempty" json:"argument_regex,omitempty"`
// ArgumentRegex named regex names for key and value extractions. default: key and value
ArgumentRegexKey string `yaml:"argument_regex_key_name"` // default: key
ArgumentRegexValue string `yaml:"argument_regex_value_name"` // default: value
ArgumentRegexKey string `yaml:"argument_regex_key_name,omitempty" json:"argument_regex_key_name,omitempty"` // default: key
ArgumentRegexValue string `yaml:"argument_regex_value_name,omitempty" json:"argument_regex_value_name,omitempty"` // default: value
// ReplaceFunctionResults allow to replace strings in the results before parsing them
ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results"`
ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results,omitempty" json:"replace_function_results,omitempty"`
// ReplaceLLMResult allow to replace strings in the results before parsing them
ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results"`
ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results,omitempty" json:"replace_llm_results,omitempty"`
// CaptureLLMResult is a regex to extract a string from the LLM response
// that is used as return string when using tools.
// This is useful for e.g. if the LLM outputs a reasoning and we want to get the reasoning as a string back
CaptureLLMResult []string `yaml:"capture_llm_results"`
CaptureLLMResult []string `yaml:"capture_llm_results,omitempty" json:"capture_llm_results,omitempty"`
// FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
// instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }.
// This might be useful for certain models trained with the function name as the first token.
FunctionNameKey string `yaml:"function_name_key"`
FunctionArgumentsKey string `yaml:"function_arguments_key"`
FunctionNameKey string `yaml:"function_name_key,omitempty" json:"function_name_key,omitempty"`
FunctionArgumentsKey string `yaml:"function_arguments_key,omitempty" json:"function_arguments_key,omitempty"`
}
// @Description ReplaceResult defines a key-value replacement for function results
type ReplaceResult struct {
Key string `yaml:"key"`
Value string `yaml:"value"`
Key string `yaml:"key,omitempty" json:"key,omitempty"`
Value string `yaml:"value,omitempty" json:"value,omitempty"`
}
type FuncCallResults struct {

View File

@@ -178,11 +178,22 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
}
for {
// Check if context is cancelled before receiving
select {
case <-ctx.Done():
return ctx.Err()
default:
}
reply, err := stream.Recv()
if err == io.EOF {
break
}
if err != nil {
// Check if error is due to context cancellation
if ctx.Err() != nil {
return ctx.Err()
}
fmt.Println("Error", err)
return err

View File

@@ -51,6 +51,7 @@ type ModelFile struct {
Size int64
SHA256 string
IsReadme bool
URL string
}
// ModelDetails represents detailed information about a model
@@ -215,6 +216,7 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) {
}
// Process each file
baseURL := strings.TrimSuffix(c.baseURL, "/api/models")
for _, file := range files {
fileName := filepath.Base(file.Path)
isReadme := strings.Contains(strings.ToLower(fileName), "readme")
@@ -227,11 +229,16 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) {
sha256 = file.Oid
}
// Construct the full URL for the file
// Use /resolve/main/ for downloading files (handles LFS properly)
fileURL := fmt.Sprintf("%s/%s/resolve/main/%s", baseURL, repoID, file.Path)
modelFile := ModelFile{
Path: file.Path,
Size: file.Size,
SHA256: sha256,
IsReadme: isReadme,
URL: fileURL,
}
details.Files = append(details.Files, modelFile)

View File

@@ -1,6 +1,7 @@
package hfapi_test
import (
"fmt"
"net/http"
"net/http/httptest"
"strings"
@@ -8,7 +9,7 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
var _ = Describe("HuggingFace API Client", func() {
@@ -270,6 +271,15 @@ var _ = Describe("HuggingFace API Client", func() {
})
})
Context("when getting file SHA on remote model", func() {
It("should get file SHA successfully", func() {
sha, err := client.GetFileSHA(
"mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF", "localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf")
Expect(err).ToNot(HaveOccurred())
Expect(sha).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"))
})
})
Context("when listing files", func() {
BeforeEach(func() {
mockFilesResponse := `[
@@ -329,23 +339,25 @@ var _ = Describe("HuggingFace API Client", func() {
Context("when getting file SHA", func() {
BeforeEach(func() {
mockFileInfoResponse := `{
"path": "model-Q4_K_M.gguf",
"size": 1000000,
"oid": "abc123",
"lfs": {
"oid": "sha256:def456",
mockFilesResponse := `[
{
"type": "file",
"path": "model-Q4_K_M.gguf",
"size": 1000000,
"pointer": "version https://git-lfs.github.com/spec/v1",
"sha256": "def456789"
"oid": "abc123",
"lfs": {
"oid": "def456789",
"size": 1000000,
"pointerSize": 135
}
}
}`
]`
server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if strings.Contains(r.URL.Path, "/paths-info") {
if strings.Contains(r.URL.Path, "/tree/main") {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(mockFileInfoResponse))
w.Write([]byte(mockFilesResponse))
} else {
w.WriteHeader(http.StatusNotFound)
}
@@ -363,18 +375,29 @@ var _ = Describe("HuggingFace API Client", func() {
It("should handle missing SHA gracefully", func() {
server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"path": "file.txt", "size": 100}`))
if strings.Contains(r.URL.Path, "/tree/main") {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write([]byte(`[
{
"type": "file",
"path": "file.txt",
"size": 100,
"oid": "file123"
}
]`))
} else {
w.WriteHeader(http.StatusNotFound)
}
}))
client.SetBaseURL(server.URL)
sha, err := client.GetFileSHA("test/model", "file.txt")
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("no SHA256 found"))
Expect(sha).To(Equal(""))
Expect(err).ToNot(HaveOccurred())
// When there's no LFS, it should return the OID
Expect(sha).To(Equal("file123"))
})
})
@@ -439,6 +462,13 @@ var _ = Describe("HuggingFace API Client", func() {
Expect(details.ReadmeFile).ToNot(BeNil())
Expect(details.ReadmeFile.Path).To(Equal("README.md"))
Expect(details.ReadmeFile.IsReadme).To(BeTrue())
// Verify URLs are set for all files
baseURL := strings.TrimSuffix(server.URL, "/api/models")
for _, file := range details.Files {
expectedURL := fmt.Sprintf("%s/test/model/resolve/main/%s", baseURL, file.Path)
Expect(file.URL).To(Equal(expectedURL))
}
})
})

View File

@@ -11,3 +11,5 @@ func TestHfapi(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "HuggingFace API Suite")
}