mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-27 09:57:14 -04:00
Compare commits
4 Commits
v4.5.3
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0de0f8fcbb | ||
|
|
e95018ef70 | ||
|
|
0258f8af55 | ||
|
|
14b29ebf4e |
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm7.0
|
||||
torch==2.10.0+rocm7.0
|
||||
torch==2.12.0+cpu
|
||||
torchaudio
|
||||
torchvision
|
||||
|
||||
|
||||
@@ -7,3 +7,7 @@ setuptools
|
||||
six
|
||||
scipy
|
||||
numpy
|
||||
# fish-speech is installed editable with --no-build-isolation, so the build
|
||||
# backends of its transitive deps must already be in the venv. One of them
|
||||
# builds a Rust extension and needs setuptools-rust present at metadata time.
|
||||
setuptools-rust
|
||||
|
||||
@@ -11,14 +11,31 @@ fi
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade "
|
||||
installRequirements
|
||||
|
||||
# Fetch convert_hf_to_gguf.py from llama.cpp
|
||||
# Fetch convert_hf_to_gguf.py from llama.cpp.
|
||||
# Upstream split the model-specific logic out of the single file into a
|
||||
# sibling `conversion/` package (convert_hf_to_gguf.py now does
|
||||
# `from conversion import ...`), so a single-file download no longer runs —
|
||||
# it fails with `ModuleNotFoundError: No module named 'conversion'`. We clone
|
||||
# the repo and copy both the script and the package; Python puts the script's
|
||||
# own directory on sys.path[0], so the package resolves when placed beside it.
|
||||
LLAMA_CPP_CONVERT_VERSION="${LLAMA_CPP_CONVERT_VERSION:-master}"
|
||||
LLAMA_CPP_SRC="${EDIR}/llama.cpp"
|
||||
CONVERT_SCRIPT="${EDIR}/convert_hf_to_gguf.py"
|
||||
if [ ! -f "${CONVERT_SCRIPT}" ]; then
|
||||
echo "Downloading convert_hf_to_gguf.py from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
||||
curl -L --fail --retry 3 \
|
||||
"https://raw.githubusercontent.com/ggml-org/llama.cpp/${LLAMA_CPP_CONVERT_VERSION}/convert_hf_to_gguf.py" \
|
||||
-o "${CONVERT_SCRIPT}" || echo "Warning: Failed to download convert_hf_to_gguf.py."
|
||||
|
||||
cloneLlamaCpp() {
|
||||
if [ ! -d "${LLAMA_CPP_SRC}/.git" ]; then
|
||||
git clone --depth 1 --branch "${LLAMA_CPP_CONVERT_VERSION}" \
|
||||
https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}" 2>/dev/null || \
|
||||
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}"
|
||||
fi
|
||||
}
|
||||
|
||||
if [ ! -f "${CONVERT_SCRIPT}" ] || [ ! -d "${EDIR}/conversion" ]; then
|
||||
echo "Fetching convert_hf_to_gguf.py + conversion/ from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
||||
cloneLlamaCpp
|
||||
cp "${LLAMA_CPP_SRC}/convert_hf_to_gguf.py" "${CONVERT_SCRIPT}"
|
||||
rm -rf "${EDIR}/conversion"
|
||||
cp -r "${LLAMA_CPP_SRC}/conversion" "${EDIR}/conversion"
|
||||
fi
|
||||
|
||||
# Install gguf package from the same llama.cpp commit to keep them in sync
|
||||
@@ -41,12 +58,7 @@ QUANTIZE_BIN="${EDIR}/llama-quantize"
|
||||
if [ ! -x "${QUANTIZE_BIN}" ] && ! command -v llama-quantize &>/dev/null; then
|
||||
if command -v cmake &>/dev/null; then
|
||||
echo "Building llama-quantize from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
||||
LLAMA_CPP_SRC="${EDIR}/llama.cpp"
|
||||
if [ ! -d "${LLAMA_CPP_SRC}" ]; then
|
||||
git clone --depth 1 --branch "${LLAMA_CPP_CONVERT_VERSION}" \
|
||||
https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}" 2>/dev/null || \
|
||||
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}"
|
||||
fi
|
||||
cloneLlamaCpp # reuses the clone fetched for convert_hf_to_gguf.py
|
||||
cmake -B "${LLAMA_CPP_SRC}/build" -S "${LLAMA_CPP_SRC}" -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF
|
||||
cmake --build "${LLAMA_CPP_SRC}/build" --target llama-quantize -j"$(nproc 2>/dev/null || echo 2)"
|
||||
cp "${LLAMA_CPP_SRC}/build/bin/llama-quantize" "${QUANTIZE_BIN}"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
torch==2.10.0
|
||||
torch==2.12.0+cpu
|
||||
transformers>=4.56.2
|
||||
huggingface-hub>=1.3.0
|
||||
sentencepiece
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
torch==2.10.0
|
||||
torch==2.12.0+cpu
|
||||
transformers>=4.56.2
|
||||
huggingface-hub>=1.3.0
|
||||
sentencepiece
|
||||
|
||||
@@ -85,9 +85,15 @@ if [ "x${BUILD_TYPE}" == "x" ] || [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
|
||||
# The resulting binary still requires an AVX-512 capable CPU at runtime,
|
||||
# same constraint sglang upstream documents in docker/xeon.Dockerfile.
|
||||
|
||||
# Pin the source build to the same release the GPU path floors on
|
||||
# (0.5.11, see requirements-cublas12-after.txt). An unpinned master clone
|
||||
# pulls in newer CPU kernels (e.g. mamba/fla.cpp) that fail to compile
|
||||
# (constexpr non-constant + kineto_LIBRARY-NOTFOUND). Bump deliberately.
|
||||
SGLANG_VERSION="${SGLANG_VERSION:-v0.5.11}"
|
||||
_sgl_src=$(mktemp -d)
|
||||
trap 'rm -rf "${_sgl_src}"' EXIT
|
||||
git clone --depth 1 https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
|
||||
git clone --depth 1 --branch "${SGLANG_VERSION}" \
|
||||
https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
|
||||
|
||||
# Patch -march=native → -march=sapphirerapids in the CPU kernel CMakeLists
|
||||
sed -i 's/-march=native/-march=sapphirerapids/g' \
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
accelerate
|
||||
torch==2.9.0
|
||||
torch==2.12.0+cpu
|
||||
torchvision
|
||||
torchaudio
|
||||
transformers
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
# for cublas12 so uv consults this index alongside PyPI.
|
||||
--extra-index-url https://download.pytorch.org/whl/cu128
|
||||
accelerate
|
||||
torch==2.9.1
|
||||
torch==2.12.0+cpu
|
||||
torchvision
|
||||
torchaudio
|
||||
transformers
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
accelerate
|
||||
torch==2.7.0
|
||||
torch==2.12.0+cu130
|
||||
transformers
|
||||
bitsandbytes
|
||||
|
||||
@@ -570,6 +570,43 @@ impl Backend for KokorosService {
|
||||
) -> Result<Response<backend::Result>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
async fn sound_detection(
|
||||
&self,
|
||||
_: Request<backend::SoundDetectionRequest>,
|
||||
) -> Result<Response<backend::SoundDetectionResponse>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
async fn depth(
|
||||
&self,
|
||||
_: Request<backend::DepthRequest>,
|
||||
) -> Result<Response<backend::DepthResponse>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
async fn token_classify(
|
||||
&self,
|
||||
_: Request<backend::TokenClassifyRequest>,
|
||||
) -> Result<Response<backend::TokenClassifyResponse>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
async fn score(
|
||||
&self,
|
||||
_: Request<backend::ScoreRequest>,
|
||||
) -> Result<Response<backend::ScoreResponse>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
type ForwardStream = ReceiverStream<Result<backend::ForwardReply, Status>>;
|
||||
|
||||
async fn forward(
|
||||
&self,
|
||||
_: Request<tonic::Streaming<backend::ForwardRequest>>,
|
||||
) -> Result<Response<Self::ForwardStream>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -1,4 +1,52 @@
|
||||
---
|
||||
- name: "qwen-agentworld-35b-a3b"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Qwen-AgentWorld-35B-A3B-GGUF
|
||||
description: |
|
||||
# Qwen-AgentWorld-35B-A3B
|
||||
|
||||
📑 Technical Report |
|
||||
📖 Blog |
|
||||
🤗 Hugging Face |
|
||||
🤖 ModelScope |
|
||||
💻 GitHub |
|
||||
🖥️ Demo
|
||||
|
||||
> [!Note]
|
||||
> This repository contains the model weights and configuration files for **Qwen-AgentWorld-35B-A3B**, a native language world model trained for agentic environment simulation.
|
||||
>
|
||||
> These artifacts are compatible with Hugging Face Transformers, vLLM, SGLang, etc.
|
||||
|
||||
**Qwen-AgentWorld** is the first language world model to cover seven agent interaction domains within a single model. It simulates agentic environments via long chain-of-thought reasoning, predicting the next environment state given an agent's action and interaction history. Trained through a three-stage pipeline — CPT injects environment knowledge, SFT activates next-state-prediction reasoning, RL sharpens simulation fidelity — Qwen-AgentWorld is a **native world model**: environment modeling is the training objective from the CPT stage onward, not a post-hoc add-on.
|
||||
|
||||
## Highlights
|
||||
|
||||
...
|
||||
license: "apache-2.0"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- qwen
|
||||
icon: https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen-AgentWorld/logo.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
model: llama-cpp/models/Qwen-AgentWorld-35B-A3B-GGUF/Qwen-AgentWorld-35B-A3B-UD-Q4_K_M.gguf
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Qwen-AgentWorld-35B-A3B-GGUF/Qwen-AgentWorld-35B-A3B-UD-Q4_K_M.gguf
|
||||
sha256: e7a8eafdd8013443b6bcc4b6fb47b2d2025f772d359650b9ceb7d75971e22cad
|
||||
uri: https://huggingface.co/unsloth/Qwen-AgentWorld-35B-A3B-GGUF/resolve/main/Qwen-AgentWorld-35B-A3B-UD-Q4_K_M.gguf
|
||||
- name: "ornith-1.0-9b"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
|
||||
@@ -17,9 +17,15 @@ rm -rf "${BACKEND_DIR}"/build-*
|
||||
# run.sh's final `exec $CURDIR/<binary>` is the contract for what gets launched;
|
||||
# the binary is not always named after the backend (e.g. parakeet-cpp launches
|
||||
# parakeet-cpp-grpc), so derive it from run.sh and fall back to ${BACKEND}.
|
||||
#
|
||||
# Only scan the `exec` line(s): many run.sh select a runtime CPU variant via
|
||||
# unquoted `LIBRARY=$CURDIR/libgo<x>-avx512.so` lines, and a whole-file grep
|
||||
# would pick the last of those (avx512, which Darwin never builds) instead of
|
||||
# the binary — failing the check below for whisper/sam3-cpp/vibevoice-cpp/...
|
||||
# Also tolerate the exec being quoted (`exec "$CURDIR"/<binary>`).
|
||||
RUN_BINARY=""
|
||||
if [ -f "${BACKEND_DIR}/run.sh" ]; then
|
||||
RUN_BINARY=$(grep -oE '\$CURDIR/[A-Za-z0-9._-]+' "${BACKEND_DIR}/run.sh" | grep -v 'ld\.so' | tail -1 | sed 's|\$CURDIR/||')
|
||||
RUN_BINARY=$(grep -E '^[[:space:]]*exec[[:space:]]' "${BACKEND_DIR}/run.sh" | grep -oE '"?\$CURDIR"?/[A-Za-z0-9._-]+' | grep -v 'ld\.so' | tail -1 | sed -E 's|"?\$CURDIR"?/||')
|
||||
fi
|
||||
RUN_BINARY="${RUN_BINARY:-${BACKEND}}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user