mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-03 04:46:54 -04:00
Compare commits
1 Commits
fix/mlx-to
...
worktree-i
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d0e6bf3aa7 |
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=87fc8701ff4da81a7d2a91ec0695f95eb3066a47
|
||||
IK_LLAMA_VERSION?=068b173649f2fd8dc96b35ada5a0b76d8985105d
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=fdb1db877c526ec90f668eca1b858da5dba85560
|
||||
LLAMA_VERSION?=4fc4ec5541b243957ae5099edb67372f8f3b550e
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
# Local development: point at a working checkout instead of cloning, e.g.
|
||||
# make PRIVACY_FILTER_SRC=$HOME/c/privacy-filter.cpp grpc-server
|
||||
|
||||
PRIVACY_FILTER_VERSION?=735a6c28607ee82afc3a670383f41b55266a3b9a
|
||||
PRIVACY_FILTER_VERSION?=595f59630c69d361b5196f2aba2c71c873d0c13c
|
||||
PRIVACY_FILTER_REPO?=https://github.com/localai-org/privacy-filter.cpp
|
||||
PRIVACY_FILTER_SRC?=
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=9a26976a8c8cf5af0afcdd04463cf8ba91e96a54
|
||||
CRISPASR_VERSION?=fcbc8718e654995e3bd2d0c98bcb8e55e297d23c
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=2574f5936571645f784b77623e1f09bad97d948a
|
||||
STABLEDIFFUSION_GGML_VERSION?=3590aa8d626e671a1b1dc84506ea2932a243a480
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -20,15 +20,7 @@ def split_reasoning(text, think_start, think_end):
|
||||
Returns ``(reasoning_content, remaining_text)``. When ``think_start`` is
|
||||
empty or not found, returns ``("", text)`` unchanged.
|
||||
"""
|
||||
if not think_start or not text:
|
||||
return "", text
|
||||
if think_start not in text:
|
||||
# Models like Qwen3.5 open assistant turns already INSIDE thinking, so
|
||||
# the generated text carries only the closing tag. Everything before it
|
||||
# is reasoning that would otherwise leak into the content.
|
||||
if think_end and think_end in text:
|
||||
head, _, tail = text.partition(think_end)
|
||||
return head.strip(), tail.strip()
|
||||
if not think_start or not text or think_start not in text:
|
||||
return "", text
|
||||
pattern = re.compile(
|
||||
re.escape(think_start) + r"(.*?)" + re.escape(think_end or ""),
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
"""Unit tests for the mlx/mlx-vlm shared helpers (mlx_utils.py).
|
||||
|
||||
Run standalone (Python standard library only, no backend venv needed):
|
||||
python3 -m unittest mlx_utils_test
|
||||
|
||||
These mirror the server-less helper tests in backend/python/mlx/test.py
|
||||
(TestSharedHelpers), but live here so they run on any platform: the mlx
|
||||
test module imports grpc/backend_pb2 at import time and needs the MLX venv,
|
||||
whereas mlx_utils only needs the standard library.
|
||||
"""
|
||||
|
||||
import types
|
||||
import unittest
|
||||
|
||||
from mlx_utils import parse_tool_calls, split_reasoning
|
||||
|
||||
|
||||
class TestSplitReasoning(unittest.TestCase):
|
||||
def test_both_tags(self):
|
||||
r, c = split_reasoning(
|
||||
"<think>step 1\nstep 2</think>The answer is 42.", "<think>", "</think>"
|
||||
)
|
||||
self.assertEqual(r, "step 1\nstep 2")
|
||||
self.assertEqual(c, "The answer is 42.")
|
||||
|
||||
def test_implicit_opener_only_closing_tag(self):
|
||||
# Qwen3.5 opens the assistant turn already inside thinking, so the
|
||||
# output carries only the closing tag; everything before it is reasoning.
|
||||
r, c = split_reasoning(
|
||||
"The user is asking about the weather.\n</think>\n\nThe weather in Rome is sunny.",
|
||||
"<think>",
|
||||
"</think>",
|
||||
)
|
||||
self.assertEqual(r, "The user is asking about the weather.")
|
||||
self.assertEqual(c, "The weather in Rome is sunny.")
|
||||
|
||||
def test_no_tags_at_all(self):
|
||||
r, c = split_reasoning("just text", "<think>", "</think>")
|
||||
self.assertEqual(r, "")
|
||||
self.assertEqual(c, "just text")
|
||||
|
||||
def test_empty_think_end_and_no_opener_match(self):
|
||||
# No think_end to anchor on, and the opener is absent → return unchanged.
|
||||
r, c = split_reasoning("no opener here", "<think>", "")
|
||||
self.assertEqual(r, "")
|
||||
self.assertEqual(c, "no opener here")
|
||||
|
||||
def test_empty_text(self):
|
||||
r, c = split_reasoning("", "<think>", "</think>")
|
||||
self.assertEqual(r, "")
|
||||
self.assertEqual(c, "")
|
||||
|
||||
|
||||
class TestParseToolCalls(unittest.TestCase):
|
||||
def test_with_shim(self):
|
||||
tm = types.SimpleNamespace(
|
||||
tool_call_start="<tool_call>",
|
||||
tool_call_end="</tool_call>",
|
||||
parse_tool_call=lambda body, tools: {
|
||||
"name": "get_weather",
|
||||
"arguments": {"location": body.strip()},
|
||||
},
|
||||
)
|
||||
calls, remaining = parse_tool_calls(
|
||||
"Sure: <tool_call>Paris</tool_call>", tm, tools=None
|
||||
)
|
||||
self.assertEqual(len(calls), 1)
|
||||
self.assertEqual(calls[0]["name"], "get_weather")
|
||||
self.assertEqual(calls[0]["arguments"], '{"location": "Paris"}')
|
||||
self.assertEqual(calls[0]["index"], 0)
|
||||
self.assertNotIn("<tool_call>", remaining)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -58,18 +58,7 @@ def messages_to_dicts(proto_messages):
|
||||
d["reasoning_content"] = msg.reasoning_content
|
||||
if msg.tool_calls:
|
||||
try:
|
||||
tool_calls = json.loads(msg.tool_calls)
|
||||
# Chat templates (e.g. Qwen) iterate function.arguments as a
|
||||
# mapping, but the OpenAI wire format carries it as a JSON
|
||||
# string — decode it back so the template's .items() works.
|
||||
for tc in tool_calls:
|
||||
fn = tc.get("function") if isinstance(tc, dict) else None
|
||||
if isinstance(fn, dict) and isinstance(fn.get("arguments"), str):
|
||||
try:
|
||||
fn["arguments"] = json.loads(fn["arguments"])
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
d["tool_calls"] = tool_calls
|
||||
d["tool_calls"] = json.loads(msg.tool_calls)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
result.append(d)
|
||||
|
||||
@@ -1,122 +0,0 @@
|
||||
"""Unit tests for the shared python backend helpers (python_utils.py).
|
||||
|
||||
Run standalone (Python standard library only, no backend venv needed):
|
||||
python3 -m unittest python_utils_test
|
||||
|
||||
These mirror the server-less helper tests in backend/python/mlx/test.py
|
||||
(TestSharedHelpers), but live here so they run on any platform: the mlx
|
||||
test module imports grpc/backend_pb2 at import time and needs the MLX venv,
|
||||
whereas python_utils has no third-party dependency. Proto Message objects
|
||||
are faked with types.SimpleNamespace (real proto fields default to "").
|
||||
"""
|
||||
|
||||
import json
|
||||
import types
|
||||
import unittest
|
||||
|
||||
from python_utils import messages_to_dicts, parse_options
|
||||
|
||||
|
||||
def _msg(**fields):
|
||||
"""Fake a proto Message: every unset field is the empty string, as protobuf."""
|
||||
defaults = {
|
||||
"role": "",
|
||||
"content": "",
|
||||
"name": "",
|
||||
"tool_call_id": "",
|
||||
"reasoning_content": "",
|
||||
"tool_calls": "",
|
||||
}
|
||||
defaults.update(fields)
|
||||
return types.SimpleNamespace(**defaults)
|
||||
|
||||
|
||||
class TestParseOptions(unittest.TestCase):
|
||||
def test_type_inference(self):
|
||||
opts = parse_options(
|
||||
["temperature:0.7", "max_tokens:128", "trust:true", "name:hello", "no_colon_skipped"]
|
||||
)
|
||||
self.assertEqual(opts["temperature"], 0.7)
|
||||
self.assertEqual(opts["max_tokens"], 128)
|
||||
self.assertIs(opts["trust"], True)
|
||||
self.assertEqual(opts["name"], "hello")
|
||||
self.assertNotIn("no_colon_skipped", opts)
|
||||
|
||||
|
||||
class TestMessagesToDicts(unittest.TestCase):
|
||||
def test_basic_fields(self):
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(role="user", content="hi"),
|
||||
_msg(role="tool", content="42", tool_call_id="call_1", name="f"),
|
||||
]
|
||||
)
|
||||
self.assertEqual(out[0], {"role": "user", "content": "hi"})
|
||||
self.assertEqual(out[1]["tool_call_id"], "call_1")
|
||||
self.assertEqual(out[1]["name"], "f")
|
||||
|
||||
def test_tool_call_arguments_string_decoded_to_mapping(self):
|
||||
# OpenAI wire format ships function.arguments as a JSON *string*; chat
|
||||
# templates iterate it as a mapping, so it must come back as a dict.
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(
|
||||
role="assistant",
|
||||
tool_calls=json.dumps(
|
||||
[
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"arguments": '{"location": "Rome"}',
|
||||
},
|
||||
}
|
||||
]
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
args = out[0]["tool_calls"][0]["function"]["arguments"]
|
||||
self.assertEqual(args, {"location": "Rome"})
|
||||
self.assertEqual(dict(args.items()), {"location": "Rome"})
|
||||
|
||||
def test_tool_call_arguments_already_mapping_is_idempotent(self):
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(
|
||||
role="assistant",
|
||||
tool_calls=json.dumps(
|
||||
[{"function": {"name": "f", "arguments": {"a": 1}}}]
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
self.assertEqual(out[0]["tool_calls"][0]["function"]["arguments"], {"a": 1})
|
||||
|
||||
def test_tool_call_arguments_invalid_json_left_as_string(self):
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(
|
||||
role="assistant",
|
||||
tool_calls=json.dumps(
|
||||
[{"function": {"name": "f", "arguments": "not-json"}}]
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
self.assertEqual(out[0]["tool_calls"][0]["function"]["arguments"], "not-json")
|
||||
|
||||
def test_tool_call_without_function_key(self):
|
||||
out = messages_to_dicts(
|
||||
[_msg(role="assistant", tool_calls=json.dumps([{"id": "call_1"}]))]
|
||||
)
|
||||
self.assertEqual(out[0]["tool_calls"], [{"id": "call_1"}])
|
||||
|
||||
def test_tool_calls_invalid_json_dropped(self):
|
||||
out = messages_to_dicts([_msg(role="assistant", tool_calls="{not json")])
|
||||
self.assertNotIn("tool_calls", out[0])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -35,21 +35,6 @@ if [ "x${BUILD_PROFILE}" == "xcpu" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match"
|
||||
fi
|
||||
|
||||
# AMD ROCm: vLLM ships prebuilt ROCm wheels, but on a DEDICATED index
|
||||
# (https://wheels.vllm.ai/rocm/), NOT PyPI, and ONLY for CPython 3.12. On any
|
||||
# other Python the installer silently falls back to the CUDA-only PyPI wheel,
|
||||
# which is unusable on an AMD GPU (import fails, so the backend never finds the
|
||||
# vllm module). Force Python 3.12 before the venv is created (matches the
|
||||
# intel/l4t13 cp312 bump); the hipblas branch below pulls vllm from the ROCm
|
||||
# wheel index. unsafe-best-match lets uv consult that index and PyPI together.
|
||||
# https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm
|
||||
if [ "x${BUILD_TYPE}" == "xhipblas" ]; then
|
||||
PYTHON_VERSION="3.12"
|
||||
PYTHON_PATCH="12"
|
||||
PY_STANDALONE_TAG="20251120"
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match"
|
||||
fi
|
||||
|
||||
# cublas13 pulls the vLLM wheel from a per-tag cu130 index (PyPI's vllm wheel
|
||||
# is built against CUDA 12 and won't load on cu130). uv's default per-package
|
||||
# first-match strategy would still pick the PyPI wheel, so allow it to consult
|
||||
@@ -119,7 +104,7 @@ if [ "$(uname -s)" = "Darwin" ]; then
|
||||
# can rewrite it. Darwin therefore follows vllm-metal and can lag the Linux
|
||||
# vllm pin (requirements-cublas13-after.txt, bumped independently against
|
||||
# vllm/vllm) until vllm-metal supports a newer vLLM.
|
||||
VLLM_METAL_VERSION="v0.3.0.dev20260701212152"
|
||||
VLLM_METAL_VERSION="v0.3.0.dev20260701132215"
|
||||
|
||||
# The coupled vLLM source version is whatever this vllm-metal release builds
|
||||
# against -- it declares it in its own installer as `vllm_v=`. Derive it from
|
||||
@@ -209,22 +194,6 @@ elif [ "x${BUILD_TYPE}" == "xintel" ]; then
|
||||
export CMAKE_PREFIX_PATH="$(python -c 'import site; print(site.getsitepackages()[0])'):${CMAKE_PREFIX_PATH:-}"
|
||||
VLLM_TARGET_DEVICE=xpu uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --no-deps .
|
||||
popd
|
||||
# AMD ROCm: install vllm from its dedicated ROCm wheel index instead of the
|
||||
# CUDA-only PyPI wheel. installRequirements brings the base ROCm
|
||||
# torch/transformers (requirements-hipblas.txt), then we pull vllm (plus the
|
||||
# matching ROCm torch, via --upgrade) from wheels.vllm.ai/rocm. This is the
|
||||
# method upstream prescribes for AMD; the Python-3.12 pin is set above.
|
||||
# There is intentionally no requirements-hipblas-after.txt: a bare `vllm`
|
||||
# there would resolve to the CUDA wheel, and installRequirements never loads
|
||||
# a ${BUILD_TYPE}-after file for hipblas anyway (BUILD_TYPE == BUILD_PROFILE).
|
||||
# https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm
|
||||
elif [ "x${BUILD_TYPE}" == "xhipblas" ]; then
|
||||
installRequirements
|
||||
|
||||
# --upgrade reconciles the base ROCm torch to whatever the vllm ROCm wheel
|
||||
# pins; --extra-index-url adds the ROCm wheel repository on top of PyPI.
|
||||
uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} \
|
||||
--extra-index-url https://wheels.vllm.ai/rocm/ --upgrade vllm
|
||||
# FROM_SOURCE=true on a CPU build skips the prebuilt vllm wheel in
|
||||
# requirements-cpu-after.txt and compiles vllm locally against the host's
|
||||
# actual CPU. Not used by default because it takes ~30-40 minutes, but
|
||||
|
||||
1
backend/python/vllm/requirements-hipblas-after.txt
Normal file
1
backend/python/vllm/requirements-hipblas-after.txt
Normal file
@@ -0,0 +1 @@
|
||||
vllm
|
||||
@@ -157,6 +157,33 @@ var _ = Describe("X-LocalAI-Node ctx propagation contract", func() {
|
||||
stampViaRouterCtx()
|
||||
})
|
||||
|
||||
// Regression for #10636: a canceled request context must NOT cancel the
|
||||
// model LOAD. The heavy image/audio backends bind the load to the request
|
||||
// context so the routing holder reaches the SmartRouter; but a large
|
||||
// diffusers/LLM model on a slow (e.g. shared-memory iGPU) host can take
|
||||
// far longer to load than the client stays connected. If the request's
|
||||
// cancellation propagates to the load, the LoadModel RPC is aborted, the
|
||||
// backend process is torn down, and every retry restarts from scratch and
|
||||
// never converges. The load must instead run to completion and cache while
|
||||
// still carrying the request's routing holder value.
|
||||
It("ImageGeneration does not propagate request cancellation to the model load", func() {
|
||||
canceledCtx, cancel := context.WithCancel(reqCtx)
|
||||
cancel() // client disconnected while the (slow) load was still running
|
||||
|
||||
_, err := backend.ImageGeneration(canceledCtx, 64, 64, 1, 0, "p", "", "", "/tmp/out.png", loader, modelCfg, appCfg, nil)
|
||||
// The load reached the router (short-circuit sentinel), i.e. it was
|
||||
// NOT aborted early by the already-canceled request context.
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("router short-circuit (test)"))
|
||||
|
||||
routerCtx := routerCtxOf()
|
||||
Expect(routerCtx).ToNot(BeNil(), "router callback must have been invoked")
|
||||
Expect(routerCtx.Err()).To(BeNil(),
|
||||
"a canceled request must not cancel the model load")
|
||||
// The routing holder value still propagates despite the decoupling.
|
||||
stampViaRouterCtx()
|
||||
})
|
||||
|
||||
It("does NOT leak the holder when the app context is used instead", func() {
|
||||
// Sanity: the bug being fixed manifests as the router getting
|
||||
// appCfg.Context (no holder) instead of reqCtx (holder). A direct
|
||||
|
||||
@@ -40,10 +40,14 @@ func (e *modelEmbedder) Embed(ctx context.Context, text string) ([]float32, erro
|
||||
|
||||
func ModelEmbedding(ctx context.Context, s string, tokens []int, loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
||||
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
|
||||
inferenceModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
|
||||
@@ -13,10 +13,14 @@ import (
|
||||
|
||||
func ImageGeneration(ctx context.Context, height, width, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig, refImages []string) (func() error, error) {
|
||||
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
inferenceModel, err := loader.Load(
|
||||
opts...,
|
||||
)
|
||||
|
||||
@@ -111,7 +111,12 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
|
||||
}
|
||||
ctx = distributedhdr.MaybeWithPrefixChain(ctx, c.ModelID(), chainSource)
|
||||
|
||||
opts := ModelOptions(*c, o, model.WithContext(ctx))
|
||||
// context.WithoutCancel decouples the model load from the request's
|
||||
// cancellation while preserving its routing values, so a slow load still
|
||||
// completes and caches if the client disconnects instead of aborting the
|
||||
// LoadModel RPC mid-load (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(*c, o, model.WithContext(context.WithoutCancel(ctx)))
|
||||
inferenceModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(o, c.Name, c.Backend, err, map[string]any{"model_file": modelFile})
|
||||
|
||||
@@ -57,10 +57,14 @@ func (r *modelReranker) Rerank(ctx context.Context, query string, documents []st
|
||||
}
|
||||
|
||||
func Rerank(ctx context.Context, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, modelConfig config.ModelConfig) (*proto.RerankResult, error) {
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
rerankModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
|
||||
@@ -45,10 +45,14 @@ func loadTranscriptionModel(ctx context.Context, ml *model.ModelLoader, modelCon
|
||||
if modelConfig.Backend == "" {
|
||||
modelConfig.Backend = model.WhisperBackend
|
||||
}
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
transcriptionModel, err := ml.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
|
||||
@@ -50,10 +50,14 @@ func ModelTTS(
|
||||
appConfig *config.ApplicationConfig,
|
||||
modelConfig config.ModelConfig,
|
||||
) (string, *proto.Result, error) {
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
ttsModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
@@ -153,7 +157,9 @@ func ModelTTSStream(
|
||||
modelConfig config.ModelConfig,
|
||||
audioCallback func([]byte) error,
|
||||
) error {
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
// See ModelTTS above: WithoutCancel decouples the load from request
|
||||
// cancellation while preserving routing values (issue #10636).
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
ttsModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
|
||||
@@ -14,10 +14,14 @@ func VAD(request *schema.VADRequest,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
modelConfig config.ModelConfig) (*schema.VADResponse, error) {
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
vadModel, err := ml.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
|
||||
@@ -1,133 +0,0 @@
|
||||
import { test, expect } from './coverage-fixtures.js'
|
||||
|
||||
// Seeds two-message chat into localStorage so we don't need a live model.
|
||||
async function seedChat(page, history) {
|
||||
await page.addInitScript((h) => {
|
||||
const chat = {
|
||||
id: 'seed1', name: 'Seeded Chat', model: 'test-model',
|
||||
history: h, systemPrompt: '', mcpMode: false, mcpServers: [],
|
||||
clientMCPServers: [], temperature: null, topP: null, topK: null,
|
||||
tokenUsage: { prompt: 0, completion: 0, total: 0 },
|
||||
contextSize: null, createdAt: Date.now(), updatedAt: Date.now(),
|
||||
}
|
||||
localStorage.setItem('localai_chats_data', JSON.stringify({
|
||||
chats: [chat], activeChatId: 'seed1', lastSaved: Date.now(),
|
||||
}))
|
||||
}, history)
|
||||
}
|
||||
|
||||
async function mockModels(page) {
|
||||
await page.route('**/api/models/capabilities', (route) => route.fulfill({
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ data: [{ id: 'test-model', capabilities: ['FLAG_CHAT'] }] }),
|
||||
}))
|
||||
await page.route('**/api/operations', (route) => route.fulfill({
|
||||
contentType: 'application/json', body: JSON.stringify({ operations: [] }),
|
||||
}))
|
||||
}
|
||||
|
||||
const TWO_TURNS = [
|
||||
{ role: 'user', content: 'first question' },
|
||||
{ role: 'assistant', content: 'first answer' },
|
||||
{ role: 'user', content: 'second question' },
|
||||
{ role: 'assistant', content: 'second answer' },
|
||||
]
|
||||
|
||||
test('duplicate creates an independent copy and switches to it', async ({ page }) => {
|
||||
await mockModels(page)
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
// Open the chats menu (Ctrl/Cmd+K) and duplicate the seeded chat.
|
||||
// Wait for the menu trigger to mount so its global keydown listener is armed
|
||||
// before we dispatch the shortcut.
|
||||
await page.getByTitle('Conversations (Ctrl/Cmd+K)').waitFor()
|
||||
await page.keyboard.press('Control+k')
|
||||
await page.getByTitle('Duplicate chat').first().click()
|
||||
|
||||
// A new active chat named "Seeded Chat (fork)" with the same 4 messages.
|
||||
await expect(page.locator('.chat-header-title')).toHaveText('Seeded Chat (fork)')
|
||||
await expect(page.locator('.chat-message-user')).toHaveCount(2)
|
||||
await expect(page.locator('.chat-message-assistant')).toHaveCount(2)
|
||||
})
|
||||
|
||||
async function mockCompletion(page, replyText) {
|
||||
await page.route('**/v1/chat/completions', (route) => {
|
||||
const sse =
|
||||
`data: ${JSON.stringify({ choices: [{ delta: { content: replyText } }] })}\n\n` +
|
||||
`data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: 'stop' }], usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 } })}\n\n` +
|
||||
`data: [DONE]\n\n`
|
||||
route.fulfill({ status: 200, contentType: 'text/event-stream', body: sse })
|
||||
})
|
||||
}
|
||||
|
||||
test('retry regenerates the first answer and drops the later turn', async ({ page }) => {
|
||||
await mockModels(page)
|
||||
// Capture the outbound request body so we can assert the model receives the
|
||||
// truncated history (not the stale downstream turns).
|
||||
let sentMessages = null
|
||||
await page.route('**/v1/chat/completions', (route) => {
|
||||
sentMessages = route.request().postDataJSON()?.messages || []
|
||||
const sse =
|
||||
`data: ${JSON.stringify({ choices: [{ delta: { content: 'REGENERATED first answer' } }] })}\n\n` +
|
||||
`data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: 'stop' }], usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 } })}\n\n` +
|
||||
`data: [DONE]\n\n`
|
||||
route.fulfill({ status: 200, contentType: 'text/event-stream', body: sse })
|
||||
})
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
// Hover the FIRST assistant message and click its retry button.
|
||||
const firstAssistant = page.locator('.chat-message-assistant').first()
|
||||
await firstAssistant.hover()
|
||||
await firstAssistant.getByTitle('Regenerate').click()
|
||||
|
||||
// History is truncated to the first user turn, then the new answer streams in;
|
||||
// the second Q/A turn is gone.
|
||||
await expect(page.locator('.chat-message-assistant')).toContainText(['REGENERATED first answer'])
|
||||
await expect(page.locator('.chat-message-user')).toHaveCount(1)
|
||||
await expect(page.locator('.chat-message-assistant')).toHaveCount(1)
|
||||
|
||||
// The OUTBOUND payload must also be truncated: the resent user turn is present,
|
||||
// but the downstream turn and the stale first answer must be gone.
|
||||
const contents = (sentMessages || []).map(m =>
|
||||
typeof m.content === 'string' ? m.content : JSON.stringify(m.content)
|
||||
)
|
||||
expect(contents.join('\n')).toContain('first question')
|
||||
expect(contents.join('\n')).not.toContain('second question')
|
||||
expect(contents.join('\n')).not.toContain('first answer')
|
||||
})
|
||||
|
||||
test('copy chat puts the whole conversation on the clipboard', async ({ page, context }) => {
|
||||
await context.grantPermissions(['clipboard-read', 'clipboard-write'])
|
||||
await mockModels(page)
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
// Wait for the menu trigger to mount so its global keydown listener is armed
|
||||
// before we dispatch the shortcut (same mount-race guard as the duplicate test).
|
||||
await page.getByTitle('Conversations (Ctrl/Cmd+K)').waitFor()
|
||||
await page.keyboard.press('Control+k')
|
||||
await page.getByTitle('Copy chat').first().click()
|
||||
|
||||
const clip = await page.evaluate(() => navigator.clipboard.readText())
|
||||
expect(clip).toContain('# Seeded Chat')
|
||||
expect(clip).toContain('first answer')
|
||||
expect(clip).toContain('second answer')
|
||||
})
|
||||
|
||||
test('branch from the first answer forks history up to that point', async ({ page }) => {
|
||||
await mockModels(page)
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
const firstAssistant = page.locator('.chat-message-assistant').first()
|
||||
await firstAssistant.hover()
|
||||
await firstAssistant.getByTitle('Branch from here').click()
|
||||
|
||||
// New active chat "Seeded Chat (fork)" contains only the first Q/A turn.
|
||||
await expect(page.locator('.chat-header-title')).toHaveText('Seeded Chat (fork)')
|
||||
await expect(page.locator('.chat-message-user')).toHaveCount(1)
|
||||
await expect(page.locator('.chat-message-assistant')).toHaveCount(1)
|
||||
await expect(page.locator('.chat-message-assistant')).toContainText(['first answer'])
|
||||
})
|
||||
@@ -72,7 +72,6 @@
|
||||
"actions": {
|
||||
"copy": "Copy",
|
||||
"regenerate": "Regenerate",
|
||||
"branch": "Branch from here",
|
||||
"jumpToLatest": "Jump to latest"
|
||||
},
|
||||
"streaming": {
|
||||
@@ -101,9 +100,7 @@
|
||||
"toasts": {
|
||||
"selectModel": "Please select a model",
|
||||
"copied": "Copied to clipboard",
|
||||
"copyFailed": "Could not copy to clipboard",
|
||||
"chatCopied": "Chat copied to clipboard",
|
||||
"forked": "Created a new chat"
|
||||
"copyFailed": "Could not copy to clipboard"
|
||||
},
|
||||
"menu": {
|
||||
"trigger": "Chats",
|
||||
@@ -113,8 +110,6 @@
|
||||
"noMatch": "No conversations match your search",
|
||||
"noConversations": "No conversations yet",
|
||||
"rename": "Rename",
|
||||
"duplicate": "Duplicate chat",
|
||||
"copyChat": "Copy chat",
|
||||
"exportMarkdown": "Export as Markdown",
|
||||
"deleteChat": "Delete chat",
|
||||
"newChat": "New chat",
|
||||
|
||||
@@ -24,8 +24,6 @@ const ChatsMenu = forwardRef(function ChatsMenu({
|
||||
onDeleteAll,
|
||||
onRename,
|
||||
onExport,
|
||||
onCopyChat,
|
||||
onDuplicate,
|
||||
}, ref) {
|
||||
const { t } = useTranslation('chat')
|
||||
const [open, setOpen] = useState(false)
|
||||
@@ -232,24 +230,6 @@ const ChatsMenu = forwardRef(function ChatsMenu({
|
||||
>
|
||||
<i className="fas fa-pen" />
|
||||
</button>
|
||||
{onDuplicate && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={(e) => { e.stopPropagation(); onDuplicate(chat); setOpen(false) }}
|
||||
title={t('menu.duplicate')}
|
||||
>
|
||||
<i className="fas fa-clone" />
|
||||
</button>
|
||||
)}
|
||||
{(chat.history?.length || 0) > 0 && onCopyChat && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={(e) => { e.stopPropagation(); onCopyChat(chat) }}
|
||||
title={t('menu.copyChat')}
|
||||
>
|
||||
<i className="fas fa-clipboard" />
|
||||
</button>
|
||||
)}
|
||||
{(chat.history?.length || 0) > 0 && onExport && (
|
||||
<button
|
||||
type="button"
|
||||
|
||||
27
core/http/react-ui/src/hooks/useChat.js
vendored
27
core/http/react-ui/src/hooks/useChat.js
vendored
@@ -141,24 +141,6 @@ export function useChat(initialModel = '') {
|
||||
return chat
|
||||
}, [])
|
||||
|
||||
const forkChat = useCallback((chatId, uptoIndex) => {
|
||||
const src = chats.find(c => c.id === chatId)
|
||||
if (!src) return null
|
||||
const end = typeof uptoIndex === 'number' ? uptoIndex : src.history.length
|
||||
const forked = {
|
||||
...src,
|
||||
id: generateId(),
|
||||
name: `${src.name} (fork)`,
|
||||
history: structuredClone(src.history.slice(0, end)),
|
||||
tokenUsage: { prompt: 0, completion: 0, total: 0 },
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now(),
|
||||
}
|
||||
setChats(prev => [forked, ...prev])
|
||||
setActiveChatId(forked.id)
|
||||
return forked
|
||||
}, [chats])
|
||||
|
||||
const switchChat = useCallback((chatId) => {
|
||||
setActiveChatId(chatId)
|
||||
setStreamingContent('')
|
||||
@@ -278,12 +260,8 @@ export function useChat(initialModel = '') {
|
||||
if (chat?.systemPrompt) {
|
||||
messages.push({ role: 'system', content: chat.systemPrompt })
|
||||
}
|
||||
// Filter out thinking/reasoning/tool_call/tool_result messages.
|
||||
// options.baseHistory lets callers (e.g. mid-conversation retry) pass the
|
||||
// intended truncated history synchronously; the closure `chat` still holds
|
||||
// the stale pre-truncation state because setChats only schedules an update.
|
||||
const baseHistory = options.baseHistory || chat?.history || []
|
||||
const historyForApi = baseHistory.filter(m =>
|
||||
// Filter out thinking/reasoning/tool_call/tool_result messages
|
||||
const historyForApi = (chat?.history || []).filter(m =>
|
||||
m.role !== 'thinking' && m.role !== 'reasoning' && m.role !== 'tool_call' && m.role !== 'tool_result'
|
||||
)
|
||||
messages.push(...historyForApi, { role: 'user', content: messageContent })
|
||||
@@ -815,7 +793,6 @@ export function useChat(initialModel = '') {
|
||||
tokensPerSecond,
|
||||
maxTokensPerSecond,
|
||||
addChat,
|
||||
forkChat,
|
||||
switchChat,
|
||||
deleteChat,
|
||||
deleteAllChats,
|
||||
|
||||
@@ -33,7 +33,7 @@ function getLastMessagePreview(chat) {
|
||||
return ''
|
||||
}
|
||||
|
||||
function serializeChatAsMarkdown(chat) {
|
||||
function exportChatAsMarkdown(chat) {
|
||||
let md = `# ${chat.name}\n\n`
|
||||
md += `Model: ${chat.model || 'Unknown'}\n`
|
||||
md += `Date: ${new Date(chat.createdAt).toLocaleString()}\n\n---\n\n`
|
||||
@@ -47,11 +47,7 @@ function serializeChatAsMarkdown(chat) {
|
||||
md += `<details><summary>Thinking</summary>\n\n${msg.content}\n\n</details>\n\n`
|
||||
}
|
||||
}
|
||||
return md
|
||||
}
|
||||
|
||||
function downloadChatAsMarkdown(chat) {
|
||||
const blob = new Blob([serializeChatAsMarkdown(chat)], { type: 'text/markdown' })
|
||||
const blob = new Blob([md], { type: 'text/markdown' })
|
||||
const url = URL.createObjectURL(blob)
|
||||
const a = document.createElement('a')
|
||||
a.href = url
|
||||
@@ -298,7 +294,7 @@ export default function Chat() {
|
||||
const {
|
||||
chats, activeChat, activeChatId, isStreaming, streamingChatId, streamingContent,
|
||||
streamingReasoning, streamingToolCalls, tokensPerSecond, maxTokensPerSecond,
|
||||
addChat, forkChat, switchChat, deleteChat, deleteAllChats, renameChat, updateChatSettings,
|
||||
addChat, switchChat, deleteChat, deleteAllChats, renameChat, updateChatSettings,
|
||||
sendMessage, stopGeneration, clearHistory, getContextUsagePercent, addMessage,
|
||||
} = useChat(urlModel || '')
|
||||
|
||||
@@ -799,27 +795,34 @@ export default function Chat() {
|
||||
await sendMessage(msg, files, mcpOptions)
|
||||
}, [input, files, activeChat, sendMessage, addToast, getToolsForLLM, isClientTool, executeTool, hasAppUI, getAppResource, getToolDefinition])
|
||||
|
||||
const handleRegenerate = useCallback(async (targetIndex) => {
|
||||
const handleRegenerate = useCallback(async () => {
|
||||
if (!activeChat || isStreaming) return
|
||||
const history = activeChat.history
|
||||
const end = typeof targetIndex === 'number' ? targetIndex : history.length
|
||||
// Nearest user message at or before the target answer.
|
||||
let userIdx = -1
|
||||
for (let i = Math.min(end, history.length) - 1; i >= 0; i--) {
|
||||
if (history[i].role === 'user') { userIdx = i; break }
|
||||
let lastUserMsg = null
|
||||
let lastUserFiles = null
|
||||
for (let i = history.length - 1; i >= 0; i--) {
|
||||
if (history[i].role === 'user') {
|
||||
lastUserMsg = typeof history[i].content === 'string' ? history[i].content : history[i].content?.[0]?.text || ''
|
||||
lastUserFiles = history[i].files || []
|
||||
break
|
||||
}
|
||||
}
|
||||
if (userIdx === -1) return
|
||||
const userMsg = typeof history[userIdx].content === 'string'
|
||||
? history[userIdx].content
|
||||
: history[userIdx].content?.[0]?.text || ''
|
||||
const userFiles = history[userIdx].files || []
|
||||
// Drop the user turn and everything after it; sendMessage re-appends it.
|
||||
// Thread the truncated history through explicitly: updateChatSettings only
|
||||
// schedules a state update, so sendMessage's closure would otherwise read
|
||||
// the stale pre-truncation history for the outbound API payload.
|
||||
const baseHistory = history.slice(0, userIdx)
|
||||
updateChatSettings(activeChat.id, { history: baseHistory })
|
||||
await sendMessage(userMsg, userFiles, { baseHistory })
|
||||
if (!lastUserMsg) return
|
||||
|
||||
// Remove everything after and including the last user message
|
||||
const newHistory = []
|
||||
let foundLastUser = false
|
||||
for (let i = history.length - 1; i >= 0; i--) {
|
||||
if (!foundLastUser && history[i].role === 'user') {
|
||||
foundLastUser = true
|
||||
continue
|
||||
}
|
||||
if (foundLastUser) {
|
||||
newHistory.unshift(history[i])
|
||||
}
|
||||
}
|
||||
updateChatSettings(activeChat.id, { history: newHistory })
|
||||
await sendMessage(lastUserMsg, lastUserFiles)
|
||||
}, [activeChat, isStreaming, sendMessage, updateChatSettings])
|
||||
|
||||
const handleKeyDown = (e) => {
|
||||
@@ -849,11 +852,6 @@ export default function Chat() {
|
||||
}
|
||||
}
|
||||
|
||||
const copyChatAsMarkdown = async (chat) => {
|
||||
const ok = await copyToClipboard(serializeChatAsMarkdown(chat))
|
||||
addToast(ok ? t('toasts.chatCopied') : t('toasts.copyFailed'), ok ? 'success' : 'error', ok ? 2000 : 3000)
|
||||
}
|
||||
|
||||
const contextPercent = getContextUsagePercent()
|
||||
|
||||
// Recent chats for the empty state — exclude the current chat and any
|
||||
@@ -894,9 +892,7 @@ export default function Chat() {
|
||||
onDelete={deleteChat}
|
||||
onDeleteAll={promptDeleteAll}
|
||||
onRename={renameChat}
|
||||
onExport={(chat) => downloadChatAsMarkdown(chat)}
|
||||
onCopyChat={(chat) => copyChatAsMarkdown(chat)}
|
||||
onDuplicate={(chat) => { if (forkChat(chat.id)) addToast(t('toasts.forked'), 'success', 2000) }}
|
||||
onExport={(chat) => exportChatAsMarkdown(chat)}
|
||||
/>
|
||||
{activeChat.localaiAssistant && (
|
||||
<span
|
||||
@@ -1188,19 +1184,11 @@ export default function Chat() {
|
||||
<button onClick={() => copyMessage(msg.content)} title={t('actions.copy')}>
|
||||
<i className="fas fa-copy" />
|
||||
</button>
|
||||
{msg.role === 'assistant' && !isStreaming && (
|
||||
<button onClick={() => handleRegenerate(i)} title={t('actions.regenerate')}>
|
||||
{msg.role === 'assistant' && i === activeChat.history.length - 1 && !isStreaming && (
|
||||
<button onClick={handleRegenerate} title={t('actions.regenerate')}>
|
||||
<i className="fas fa-rotate" />
|
||||
</button>
|
||||
)}
|
||||
{msg.role === 'assistant' && !isStreaming && (
|
||||
<button
|
||||
onClick={() => { forkChat(activeChat.id, i + 1); addToast(t('toasts.forked'), 'success', 2000) }}
|
||||
title={t('actions.branch')}
|
||||
>
|
||||
<i className="fas fa-code-branch" />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user