mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-03 04:46:54 -04:00
Compare commits
9 Commits
worktree-i
...
fix/mlx-to
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4bf73a7e22 | ||
|
|
715d4ed8e5 | ||
|
|
9fcc9c0d43 | ||
|
|
3c67b5b746 | ||
|
|
bea66fd84e | ||
|
|
f7a5dfd5ae | ||
|
|
6bcaf30c14 | ||
|
|
ef15b4bfda | ||
|
|
237bce48e8 |
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=068b173649f2fd8dc96b35ada5a0b76d8985105d
|
||||
IK_LLAMA_VERSION?=87fc8701ff4da81a7d2a91ec0695f95eb3066a47
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=4fc4ec5541b243957ae5099edb67372f8f3b550e
|
||||
LLAMA_VERSION?=fdb1db877c526ec90f668eca1b858da5dba85560
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
# Local development: point at a working checkout instead of cloning, e.g.
|
||||
# make PRIVACY_FILTER_SRC=$HOME/c/privacy-filter.cpp grpc-server
|
||||
|
||||
PRIVACY_FILTER_VERSION?=595f59630c69d361b5196f2aba2c71c873d0c13c
|
||||
PRIVACY_FILTER_VERSION?=735a6c28607ee82afc3a670383f41b55266a3b9a
|
||||
PRIVACY_FILTER_REPO?=https://github.com/localai-org/privacy-filter.cpp
|
||||
PRIVACY_FILTER_SRC?=
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=fcbc8718e654995e3bd2d0c98bcb8e55e297d23c
|
||||
CRISPASR_VERSION?=9a26976a8c8cf5af0afcdd04463cf8ba91e96a54
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=3590aa8d626e671a1b1dc84506ea2932a243a480
|
||||
STABLEDIFFUSION_GGML_VERSION?=2574f5936571645f784b77623e1f09bad97d948a
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -20,7 +20,15 @@ def split_reasoning(text, think_start, think_end):
|
||||
Returns ``(reasoning_content, remaining_text)``. When ``think_start`` is
|
||||
empty or not found, returns ``("", text)`` unchanged.
|
||||
"""
|
||||
if not think_start or not text or think_start not in text:
|
||||
if not think_start or not text:
|
||||
return "", text
|
||||
if think_start not in text:
|
||||
# Models like Qwen3.5 open assistant turns already INSIDE thinking, so
|
||||
# the generated text carries only the closing tag. Everything before it
|
||||
# is reasoning that would otherwise leak into the content.
|
||||
if think_end and think_end in text:
|
||||
head, _, tail = text.partition(think_end)
|
||||
return head.strip(), tail.strip()
|
||||
return "", text
|
||||
pattern = re.compile(
|
||||
re.escape(think_start) + r"(.*?)" + re.escape(think_end or ""),
|
||||
|
||||
75
backend/python/common/mlx_utils_test.py
Normal file
75
backend/python/common/mlx_utils_test.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""Unit tests for the mlx/mlx-vlm shared helpers (mlx_utils.py).
|
||||
|
||||
Run standalone (Python standard library only, no backend venv needed):
|
||||
python3 -m unittest mlx_utils_test
|
||||
|
||||
These mirror the server-less helper tests in backend/python/mlx/test.py
|
||||
(TestSharedHelpers), but live here so they run on any platform: the mlx
|
||||
test module imports grpc/backend_pb2 at import time and needs the MLX venv,
|
||||
whereas mlx_utils only needs the standard library.
|
||||
"""
|
||||
|
||||
import types
|
||||
import unittest
|
||||
|
||||
from mlx_utils import parse_tool_calls, split_reasoning
|
||||
|
||||
|
||||
class TestSplitReasoning(unittest.TestCase):
|
||||
def test_both_tags(self):
|
||||
r, c = split_reasoning(
|
||||
"<think>step 1\nstep 2</think>The answer is 42.", "<think>", "</think>"
|
||||
)
|
||||
self.assertEqual(r, "step 1\nstep 2")
|
||||
self.assertEqual(c, "The answer is 42.")
|
||||
|
||||
def test_implicit_opener_only_closing_tag(self):
|
||||
# Qwen3.5 opens the assistant turn already inside thinking, so the
|
||||
# output carries only the closing tag; everything before it is reasoning.
|
||||
r, c = split_reasoning(
|
||||
"The user is asking about the weather.\n</think>\n\nThe weather in Rome is sunny.",
|
||||
"<think>",
|
||||
"</think>",
|
||||
)
|
||||
self.assertEqual(r, "The user is asking about the weather.")
|
||||
self.assertEqual(c, "The weather in Rome is sunny.")
|
||||
|
||||
def test_no_tags_at_all(self):
|
||||
r, c = split_reasoning("just text", "<think>", "</think>")
|
||||
self.assertEqual(r, "")
|
||||
self.assertEqual(c, "just text")
|
||||
|
||||
def test_empty_think_end_and_no_opener_match(self):
|
||||
# No think_end to anchor on, and the opener is absent → return unchanged.
|
||||
r, c = split_reasoning("no opener here", "<think>", "")
|
||||
self.assertEqual(r, "")
|
||||
self.assertEqual(c, "no opener here")
|
||||
|
||||
def test_empty_text(self):
|
||||
r, c = split_reasoning("", "<think>", "</think>")
|
||||
self.assertEqual(r, "")
|
||||
self.assertEqual(c, "")
|
||||
|
||||
|
||||
class TestParseToolCalls(unittest.TestCase):
|
||||
def test_with_shim(self):
|
||||
tm = types.SimpleNamespace(
|
||||
tool_call_start="<tool_call>",
|
||||
tool_call_end="</tool_call>",
|
||||
parse_tool_call=lambda body, tools: {
|
||||
"name": "get_weather",
|
||||
"arguments": {"location": body.strip()},
|
||||
},
|
||||
)
|
||||
calls, remaining = parse_tool_calls(
|
||||
"Sure: <tool_call>Paris</tool_call>", tm, tools=None
|
||||
)
|
||||
self.assertEqual(len(calls), 1)
|
||||
self.assertEqual(calls[0]["name"], "get_weather")
|
||||
self.assertEqual(calls[0]["arguments"], '{"location": "Paris"}')
|
||||
self.assertEqual(calls[0]["index"], 0)
|
||||
self.assertNotIn("<tool_call>", remaining)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -58,7 +58,18 @@ def messages_to_dicts(proto_messages):
|
||||
d["reasoning_content"] = msg.reasoning_content
|
||||
if msg.tool_calls:
|
||||
try:
|
||||
d["tool_calls"] = json.loads(msg.tool_calls)
|
||||
tool_calls = json.loads(msg.tool_calls)
|
||||
# Chat templates (e.g. Qwen) iterate function.arguments as a
|
||||
# mapping, but the OpenAI wire format carries it as a JSON
|
||||
# string — decode it back so the template's .items() works.
|
||||
for tc in tool_calls:
|
||||
fn = tc.get("function") if isinstance(tc, dict) else None
|
||||
if isinstance(fn, dict) and isinstance(fn.get("arguments"), str):
|
||||
try:
|
||||
fn["arguments"] = json.loads(fn["arguments"])
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
d["tool_calls"] = tool_calls
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
result.append(d)
|
||||
|
||||
122
backend/python/common/python_utils_test.py
Normal file
122
backend/python/common/python_utils_test.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""Unit tests for the shared python backend helpers (python_utils.py).
|
||||
|
||||
Run standalone (Python standard library only, no backend venv needed):
|
||||
python3 -m unittest python_utils_test
|
||||
|
||||
These mirror the server-less helper tests in backend/python/mlx/test.py
|
||||
(TestSharedHelpers), but live here so they run on any platform: the mlx
|
||||
test module imports grpc/backend_pb2 at import time and needs the MLX venv,
|
||||
whereas python_utils has no third-party dependency. Proto Message objects
|
||||
are faked with types.SimpleNamespace (real proto fields default to "").
|
||||
"""
|
||||
|
||||
import json
|
||||
import types
|
||||
import unittest
|
||||
|
||||
from python_utils import messages_to_dicts, parse_options
|
||||
|
||||
|
||||
def _msg(**fields):
|
||||
"""Fake a proto Message: every unset field is the empty string, as protobuf."""
|
||||
defaults = {
|
||||
"role": "",
|
||||
"content": "",
|
||||
"name": "",
|
||||
"tool_call_id": "",
|
||||
"reasoning_content": "",
|
||||
"tool_calls": "",
|
||||
}
|
||||
defaults.update(fields)
|
||||
return types.SimpleNamespace(**defaults)
|
||||
|
||||
|
||||
class TestParseOptions(unittest.TestCase):
|
||||
def test_type_inference(self):
|
||||
opts = parse_options(
|
||||
["temperature:0.7", "max_tokens:128", "trust:true", "name:hello", "no_colon_skipped"]
|
||||
)
|
||||
self.assertEqual(opts["temperature"], 0.7)
|
||||
self.assertEqual(opts["max_tokens"], 128)
|
||||
self.assertIs(opts["trust"], True)
|
||||
self.assertEqual(opts["name"], "hello")
|
||||
self.assertNotIn("no_colon_skipped", opts)
|
||||
|
||||
|
||||
class TestMessagesToDicts(unittest.TestCase):
|
||||
def test_basic_fields(self):
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(role="user", content="hi"),
|
||||
_msg(role="tool", content="42", tool_call_id="call_1", name="f"),
|
||||
]
|
||||
)
|
||||
self.assertEqual(out[0], {"role": "user", "content": "hi"})
|
||||
self.assertEqual(out[1]["tool_call_id"], "call_1")
|
||||
self.assertEqual(out[1]["name"], "f")
|
||||
|
||||
def test_tool_call_arguments_string_decoded_to_mapping(self):
|
||||
# OpenAI wire format ships function.arguments as a JSON *string*; chat
|
||||
# templates iterate it as a mapping, so it must come back as a dict.
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(
|
||||
role="assistant",
|
||||
tool_calls=json.dumps(
|
||||
[
|
||||
{
|
||||
"id": "call_1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"arguments": '{"location": "Rome"}',
|
||||
},
|
||||
}
|
||||
]
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
args = out[0]["tool_calls"][0]["function"]["arguments"]
|
||||
self.assertEqual(args, {"location": "Rome"})
|
||||
self.assertEqual(dict(args.items()), {"location": "Rome"})
|
||||
|
||||
def test_tool_call_arguments_already_mapping_is_idempotent(self):
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(
|
||||
role="assistant",
|
||||
tool_calls=json.dumps(
|
||||
[{"function": {"name": "f", "arguments": {"a": 1}}}]
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
self.assertEqual(out[0]["tool_calls"][0]["function"]["arguments"], {"a": 1})
|
||||
|
||||
def test_tool_call_arguments_invalid_json_left_as_string(self):
|
||||
out = messages_to_dicts(
|
||||
[
|
||||
_msg(
|
||||
role="assistant",
|
||||
tool_calls=json.dumps(
|
||||
[{"function": {"name": "f", "arguments": "not-json"}}]
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
self.assertEqual(out[0]["tool_calls"][0]["function"]["arguments"], "not-json")
|
||||
|
||||
def test_tool_call_without_function_key(self):
|
||||
out = messages_to_dicts(
|
||||
[_msg(role="assistant", tool_calls=json.dumps([{"id": "call_1"}]))]
|
||||
)
|
||||
self.assertEqual(out[0]["tool_calls"], [{"id": "call_1"}])
|
||||
|
||||
def test_tool_calls_invalid_json_dropped(self):
|
||||
out = messages_to_dicts([_msg(role="assistant", tool_calls="{not json")])
|
||||
self.assertNotIn("tool_calls", out[0])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -35,6 +35,21 @@ if [ "x${BUILD_PROFILE}" == "xcpu" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match"
|
||||
fi
|
||||
|
||||
# AMD ROCm: vLLM ships prebuilt ROCm wheels, but on a DEDICATED index
|
||||
# (https://wheels.vllm.ai/rocm/), NOT PyPI, and ONLY for CPython 3.12. On any
|
||||
# other Python the installer silently falls back to the CUDA-only PyPI wheel,
|
||||
# which is unusable on an AMD GPU (import fails, so the backend never finds the
|
||||
# vllm module). Force Python 3.12 before the venv is created (matches the
|
||||
# intel/l4t13 cp312 bump); the hipblas branch below pulls vllm from the ROCm
|
||||
# wheel index. unsafe-best-match lets uv consult that index and PyPI together.
|
||||
# https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm
|
||||
if [ "x${BUILD_TYPE}" == "xhipblas" ]; then
|
||||
PYTHON_VERSION="3.12"
|
||||
PYTHON_PATCH="12"
|
||||
PY_STANDALONE_TAG="20251120"
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match"
|
||||
fi
|
||||
|
||||
# cublas13 pulls the vLLM wheel from a per-tag cu130 index (PyPI's vllm wheel
|
||||
# is built against CUDA 12 and won't load on cu130). uv's default per-package
|
||||
# first-match strategy would still pick the PyPI wheel, so allow it to consult
|
||||
@@ -104,7 +119,7 @@ if [ "$(uname -s)" = "Darwin" ]; then
|
||||
# can rewrite it. Darwin therefore follows vllm-metal and can lag the Linux
|
||||
# vllm pin (requirements-cublas13-after.txt, bumped independently against
|
||||
# vllm/vllm) until vllm-metal supports a newer vLLM.
|
||||
VLLM_METAL_VERSION="v0.3.0.dev20260701132215"
|
||||
VLLM_METAL_VERSION="v0.3.0.dev20260701212152"
|
||||
|
||||
# The coupled vLLM source version is whatever this vllm-metal release builds
|
||||
# against -- it declares it in its own installer as `vllm_v=`. Derive it from
|
||||
@@ -194,6 +209,22 @@ elif [ "x${BUILD_TYPE}" == "xintel" ]; then
|
||||
export CMAKE_PREFIX_PATH="$(python -c 'import site; print(site.getsitepackages()[0])'):${CMAKE_PREFIX_PATH:-}"
|
||||
VLLM_TARGET_DEVICE=xpu uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --no-deps .
|
||||
popd
|
||||
# AMD ROCm: install vllm from its dedicated ROCm wheel index instead of the
|
||||
# CUDA-only PyPI wheel. installRequirements brings the base ROCm
|
||||
# torch/transformers (requirements-hipblas.txt), then we pull vllm (plus the
|
||||
# matching ROCm torch, via --upgrade) from wheels.vllm.ai/rocm. This is the
|
||||
# method upstream prescribes for AMD; the Python-3.12 pin is set above.
|
||||
# There is intentionally no requirements-hipblas-after.txt: a bare `vllm`
|
||||
# there would resolve to the CUDA wheel, and installRequirements never loads
|
||||
# a ${BUILD_TYPE}-after file for hipblas anyway (BUILD_TYPE == BUILD_PROFILE).
|
||||
# https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm
|
||||
elif [ "x${BUILD_TYPE}" == "xhipblas" ]; then
|
||||
installRequirements
|
||||
|
||||
# --upgrade reconciles the base ROCm torch to whatever the vllm ROCm wheel
|
||||
# pins; --extra-index-url adds the ROCm wheel repository on top of PyPI.
|
||||
uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} \
|
||||
--extra-index-url https://wheels.vllm.ai/rocm/ --upgrade vllm
|
||||
# FROM_SOURCE=true on a CPU build skips the prebuilt vllm wheel in
|
||||
# requirements-cpu-after.txt and compiles vllm locally against the host's
|
||||
# actual CPU. Not used by default because it takes ~30-40 minutes, but
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
vllm
|
||||
133
core/http/react-ui/e2e/forking-chat.spec.js
Normal file
133
core/http/react-ui/e2e/forking-chat.spec.js
Normal file
@@ -0,0 +1,133 @@
|
||||
import { test, expect } from './coverage-fixtures.js'
|
||||
|
||||
// Seeds two-message chat into localStorage so we don't need a live model.
|
||||
async function seedChat(page, history) {
|
||||
await page.addInitScript((h) => {
|
||||
const chat = {
|
||||
id: 'seed1', name: 'Seeded Chat', model: 'test-model',
|
||||
history: h, systemPrompt: '', mcpMode: false, mcpServers: [],
|
||||
clientMCPServers: [], temperature: null, topP: null, topK: null,
|
||||
tokenUsage: { prompt: 0, completion: 0, total: 0 },
|
||||
contextSize: null, createdAt: Date.now(), updatedAt: Date.now(),
|
||||
}
|
||||
localStorage.setItem('localai_chats_data', JSON.stringify({
|
||||
chats: [chat], activeChatId: 'seed1', lastSaved: Date.now(),
|
||||
}))
|
||||
}, history)
|
||||
}
|
||||
|
||||
async function mockModels(page) {
|
||||
await page.route('**/api/models/capabilities', (route) => route.fulfill({
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ data: [{ id: 'test-model', capabilities: ['FLAG_CHAT'] }] }),
|
||||
}))
|
||||
await page.route('**/api/operations', (route) => route.fulfill({
|
||||
contentType: 'application/json', body: JSON.stringify({ operations: [] }),
|
||||
}))
|
||||
}
|
||||
|
||||
const TWO_TURNS = [
|
||||
{ role: 'user', content: 'first question' },
|
||||
{ role: 'assistant', content: 'first answer' },
|
||||
{ role: 'user', content: 'second question' },
|
||||
{ role: 'assistant', content: 'second answer' },
|
||||
]
|
||||
|
||||
test('duplicate creates an independent copy and switches to it', async ({ page }) => {
|
||||
await mockModels(page)
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
// Open the chats menu (Ctrl/Cmd+K) and duplicate the seeded chat.
|
||||
// Wait for the menu trigger to mount so its global keydown listener is armed
|
||||
// before we dispatch the shortcut.
|
||||
await page.getByTitle('Conversations (Ctrl/Cmd+K)').waitFor()
|
||||
await page.keyboard.press('Control+k')
|
||||
await page.getByTitle('Duplicate chat').first().click()
|
||||
|
||||
// A new active chat named "Seeded Chat (fork)" with the same 4 messages.
|
||||
await expect(page.locator('.chat-header-title')).toHaveText('Seeded Chat (fork)')
|
||||
await expect(page.locator('.chat-message-user')).toHaveCount(2)
|
||||
await expect(page.locator('.chat-message-assistant')).toHaveCount(2)
|
||||
})
|
||||
|
||||
async function mockCompletion(page, replyText) {
|
||||
await page.route('**/v1/chat/completions', (route) => {
|
||||
const sse =
|
||||
`data: ${JSON.stringify({ choices: [{ delta: { content: replyText } }] })}\n\n` +
|
||||
`data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: 'stop' }], usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 } })}\n\n` +
|
||||
`data: [DONE]\n\n`
|
||||
route.fulfill({ status: 200, contentType: 'text/event-stream', body: sse })
|
||||
})
|
||||
}
|
||||
|
||||
test('retry regenerates the first answer and drops the later turn', async ({ page }) => {
|
||||
await mockModels(page)
|
||||
// Capture the outbound request body so we can assert the model receives the
|
||||
// truncated history (not the stale downstream turns).
|
||||
let sentMessages = null
|
||||
await page.route('**/v1/chat/completions', (route) => {
|
||||
sentMessages = route.request().postDataJSON()?.messages || []
|
||||
const sse =
|
||||
`data: ${JSON.stringify({ choices: [{ delta: { content: 'REGENERATED first answer' } }] })}\n\n` +
|
||||
`data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: 'stop' }], usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 } })}\n\n` +
|
||||
`data: [DONE]\n\n`
|
||||
route.fulfill({ status: 200, contentType: 'text/event-stream', body: sse })
|
||||
})
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
// Hover the FIRST assistant message and click its retry button.
|
||||
const firstAssistant = page.locator('.chat-message-assistant').first()
|
||||
await firstAssistant.hover()
|
||||
await firstAssistant.getByTitle('Regenerate').click()
|
||||
|
||||
// History is truncated to the first user turn, then the new answer streams in;
|
||||
// the second Q/A turn is gone.
|
||||
await expect(page.locator('.chat-message-assistant')).toContainText(['REGENERATED first answer'])
|
||||
await expect(page.locator('.chat-message-user')).toHaveCount(1)
|
||||
await expect(page.locator('.chat-message-assistant')).toHaveCount(1)
|
||||
|
||||
// The OUTBOUND payload must also be truncated: the resent user turn is present,
|
||||
// but the downstream turn and the stale first answer must be gone.
|
||||
const contents = (sentMessages || []).map(m =>
|
||||
typeof m.content === 'string' ? m.content : JSON.stringify(m.content)
|
||||
)
|
||||
expect(contents.join('\n')).toContain('first question')
|
||||
expect(contents.join('\n')).not.toContain('second question')
|
||||
expect(contents.join('\n')).not.toContain('first answer')
|
||||
})
|
||||
|
||||
test('copy chat puts the whole conversation on the clipboard', async ({ page, context }) => {
|
||||
await context.grantPermissions(['clipboard-read', 'clipboard-write'])
|
||||
await mockModels(page)
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
// Wait for the menu trigger to mount so its global keydown listener is armed
|
||||
// before we dispatch the shortcut (same mount-race guard as the duplicate test).
|
||||
await page.getByTitle('Conversations (Ctrl/Cmd+K)').waitFor()
|
||||
await page.keyboard.press('Control+k')
|
||||
await page.getByTitle('Copy chat').first().click()
|
||||
|
||||
const clip = await page.evaluate(() => navigator.clipboard.readText())
|
||||
expect(clip).toContain('# Seeded Chat')
|
||||
expect(clip).toContain('first answer')
|
||||
expect(clip).toContain('second answer')
|
||||
})
|
||||
|
||||
test('branch from the first answer forks history up to that point', async ({ page }) => {
|
||||
await mockModels(page)
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
const firstAssistant = page.locator('.chat-message-assistant').first()
|
||||
await firstAssistant.hover()
|
||||
await firstAssistant.getByTitle('Branch from here').click()
|
||||
|
||||
// New active chat "Seeded Chat (fork)" contains only the first Q/A turn.
|
||||
await expect(page.locator('.chat-header-title')).toHaveText('Seeded Chat (fork)')
|
||||
await expect(page.locator('.chat-message-user')).toHaveCount(1)
|
||||
await expect(page.locator('.chat-message-assistant')).toHaveCount(1)
|
||||
await expect(page.locator('.chat-message-assistant')).toContainText(['first answer'])
|
||||
})
|
||||
@@ -72,6 +72,7 @@
|
||||
"actions": {
|
||||
"copy": "Copy",
|
||||
"regenerate": "Regenerate",
|
||||
"branch": "Branch from here",
|
||||
"jumpToLatest": "Jump to latest"
|
||||
},
|
||||
"streaming": {
|
||||
@@ -100,7 +101,9 @@
|
||||
"toasts": {
|
||||
"selectModel": "Please select a model",
|
||||
"copied": "Copied to clipboard",
|
||||
"copyFailed": "Could not copy to clipboard"
|
||||
"copyFailed": "Could not copy to clipboard",
|
||||
"chatCopied": "Chat copied to clipboard",
|
||||
"forked": "Created a new chat"
|
||||
},
|
||||
"menu": {
|
||||
"trigger": "Chats",
|
||||
@@ -110,6 +113,8 @@
|
||||
"noMatch": "No conversations match your search",
|
||||
"noConversations": "No conversations yet",
|
||||
"rename": "Rename",
|
||||
"duplicate": "Duplicate chat",
|
||||
"copyChat": "Copy chat",
|
||||
"exportMarkdown": "Export as Markdown",
|
||||
"deleteChat": "Delete chat",
|
||||
"newChat": "New chat",
|
||||
|
||||
@@ -24,6 +24,8 @@ const ChatsMenu = forwardRef(function ChatsMenu({
|
||||
onDeleteAll,
|
||||
onRename,
|
||||
onExport,
|
||||
onCopyChat,
|
||||
onDuplicate,
|
||||
}, ref) {
|
||||
const { t } = useTranslation('chat')
|
||||
const [open, setOpen] = useState(false)
|
||||
@@ -230,6 +232,24 @@ const ChatsMenu = forwardRef(function ChatsMenu({
|
||||
>
|
||||
<i className="fas fa-pen" />
|
||||
</button>
|
||||
{onDuplicate && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={(e) => { e.stopPropagation(); onDuplicate(chat); setOpen(false) }}
|
||||
title={t('menu.duplicate')}
|
||||
>
|
||||
<i className="fas fa-clone" />
|
||||
</button>
|
||||
)}
|
||||
{(chat.history?.length || 0) > 0 && onCopyChat && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={(e) => { e.stopPropagation(); onCopyChat(chat) }}
|
||||
title={t('menu.copyChat')}
|
||||
>
|
||||
<i className="fas fa-clipboard" />
|
||||
</button>
|
||||
)}
|
||||
{(chat.history?.length || 0) > 0 && onExport && (
|
||||
<button
|
||||
type="button"
|
||||
|
||||
27
core/http/react-ui/src/hooks/useChat.js
vendored
27
core/http/react-ui/src/hooks/useChat.js
vendored
@@ -141,6 +141,24 @@ export function useChat(initialModel = '') {
|
||||
return chat
|
||||
}, [])
|
||||
|
||||
const forkChat = useCallback((chatId, uptoIndex) => {
|
||||
const src = chats.find(c => c.id === chatId)
|
||||
if (!src) return null
|
||||
const end = typeof uptoIndex === 'number' ? uptoIndex : src.history.length
|
||||
const forked = {
|
||||
...src,
|
||||
id: generateId(),
|
||||
name: `${src.name} (fork)`,
|
||||
history: structuredClone(src.history.slice(0, end)),
|
||||
tokenUsage: { prompt: 0, completion: 0, total: 0 },
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now(),
|
||||
}
|
||||
setChats(prev => [forked, ...prev])
|
||||
setActiveChatId(forked.id)
|
||||
return forked
|
||||
}, [chats])
|
||||
|
||||
const switchChat = useCallback((chatId) => {
|
||||
setActiveChatId(chatId)
|
||||
setStreamingContent('')
|
||||
@@ -260,8 +278,12 @@ export function useChat(initialModel = '') {
|
||||
if (chat?.systemPrompt) {
|
||||
messages.push({ role: 'system', content: chat.systemPrompt })
|
||||
}
|
||||
// Filter out thinking/reasoning/tool_call/tool_result messages
|
||||
const historyForApi = (chat?.history || []).filter(m =>
|
||||
// Filter out thinking/reasoning/tool_call/tool_result messages.
|
||||
// options.baseHistory lets callers (e.g. mid-conversation retry) pass the
|
||||
// intended truncated history synchronously; the closure `chat` still holds
|
||||
// the stale pre-truncation state because setChats only schedules an update.
|
||||
const baseHistory = options.baseHistory || chat?.history || []
|
||||
const historyForApi = baseHistory.filter(m =>
|
||||
m.role !== 'thinking' && m.role !== 'reasoning' && m.role !== 'tool_call' && m.role !== 'tool_result'
|
||||
)
|
||||
messages.push(...historyForApi, { role: 'user', content: messageContent })
|
||||
@@ -793,6 +815,7 @@ export function useChat(initialModel = '') {
|
||||
tokensPerSecond,
|
||||
maxTokensPerSecond,
|
||||
addChat,
|
||||
forkChat,
|
||||
switchChat,
|
||||
deleteChat,
|
||||
deleteAllChats,
|
||||
|
||||
@@ -33,7 +33,7 @@ function getLastMessagePreview(chat) {
|
||||
return ''
|
||||
}
|
||||
|
||||
function exportChatAsMarkdown(chat) {
|
||||
function serializeChatAsMarkdown(chat) {
|
||||
let md = `# ${chat.name}\n\n`
|
||||
md += `Model: ${chat.model || 'Unknown'}\n`
|
||||
md += `Date: ${new Date(chat.createdAt).toLocaleString()}\n\n---\n\n`
|
||||
@@ -47,7 +47,11 @@ function exportChatAsMarkdown(chat) {
|
||||
md += `<details><summary>Thinking</summary>\n\n${msg.content}\n\n</details>\n\n`
|
||||
}
|
||||
}
|
||||
const blob = new Blob([md], { type: 'text/markdown' })
|
||||
return md
|
||||
}
|
||||
|
||||
function downloadChatAsMarkdown(chat) {
|
||||
const blob = new Blob([serializeChatAsMarkdown(chat)], { type: 'text/markdown' })
|
||||
const url = URL.createObjectURL(blob)
|
||||
const a = document.createElement('a')
|
||||
a.href = url
|
||||
@@ -294,7 +298,7 @@ export default function Chat() {
|
||||
const {
|
||||
chats, activeChat, activeChatId, isStreaming, streamingChatId, streamingContent,
|
||||
streamingReasoning, streamingToolCalls, tokensPerSecond, maxTokensPerSecond,
|
||||
addChat, switchChat, deleteChat, deleteAllChats, renameChat, updateChatSettings,
|
||||
addChat, forkChat, switchChat, deleteChat, deleteAllChats, renameChat, updateChatSettings,
|
||||
sendMessage, stopGeneration, clearHistory, getContextUsagePercent, addMessage,
|
||||
} = useChat(urlModel || '')
|
||||
|
||||
@@ -795,34 +799,27 @@ export default function Chat() {
|
||||
await sendMessage(msg, files, mcpOptions)
|
||||
}, [input, files, activeChat, sendMessage, addToast, getToolsForLLM, isClientTool, executeTool, hasAppUI, getAppResource, getToolDefinition])
|
||||
|
||||
const handleRegenerate = useCallback(async () => {
|
||||
const handleRegenerate = useCallback(async (targetIndex) => {
|
||||
if (!activeChat || isStreaming) return
|
||||
const history = activeChat.history
|
||||
let lastUserMsg = null
|
||||
let lastUserFiles = null
|
||||
for (let i = history.length - 1; i >= 0; i--) {
|
||||
if (history[i].role === 'user') {
|
||||
lastUserMsg = typeof history[i].content === 'string' ? history[i].content : history[i].content?.[0]?.text || ''
|
||||
lastUserFiles = history[i].files || []
|
||||
break
|
||||
}
|
||||
const end = typeof targetIndex === 'number' ? targetIndex : history.length
|
||||
// Nearest user message at or before the target answer.
|
||||
let userIdx = -1
|
||||
for (let i = Math.min(end, history.length) - 1; i >= 0; i--) {
|
||||
if (history[i].role === 'user') { userIdx = i; break }
|
||||
}
|
||||
if (!lastUserMsg) return
|
||||
|
||||
// Remove everything after and including the last user message
|
||||
const newHistory = []
|
||||
let foundLastUser = false
|
||||
for (let i = history.length - 1; i >= 0; i--) {
|
||||
if (!foundLastUser && history[i].role === 'user') {
|
||||
foundLastUser = true
|
||||
continue
|
||||
}
|
||||
if (foundLastUser) {
|
||||
newHistory.unshift(history[i])
|
||||
}
|
||||
}
|
||||
updateChatSettings(activeChat.id, { history: newHistory })
|
||||
await sendMessage(lastUserMsg, lastUserFiles)
|
||||
if (userIdx === -1) return
|
||||
const userMsg = typeof history[userIdx].content === 'string'
|
||||
? history[userIdx].content
|
||||
: history[userIdx].content?.[0]?.text || ''
|
||||
const userFiles = history[userIdx].files || []
|
||||
// Drop the user turn and everything after it; sendMessage re-appends it.
|
||||
// Thread the truncated history through explicitly: updateChatSettings only
|
||||
// schedules a state update, so sendMessage's closure would otherwise read
|
||||
// the stale pre-truncation history for the outbound API payload.
|
||||
const baseHistory = history.slice(0, userIdx)
|
||||
updateChatSettings(activeChat.id, { history: baseHistory })
|
||||
await sendMessage(userMsg, userFiles, { baseHistory })
|
||||
}, [activeChat, isStreaming, sendMessage, updateChatSettings])
|
||||
|
||||
const handleKeyDown = (e) => {
|
||||
@@ -852,6 +849,11 @@ export default function Chat() {
|
||||
}
|
||||
}
|
||||
|
||||
const copyChatAsMarkdown = async (chat) => {
|
||||
const ok = await copyToClipboard(serializeChatAsMarkdown(chat))
|
||||
addToast(ok ? t('toasts.chatCopied') : t('toasts.copyFailed'), ok ? 'success' : 'error', ok ? 2000 : 3000)
|
||||
}
|
||||
|
||||
const contextPercent = getContextUsagePercent()
|
||||
|
||||
// Recent chats for the empty state — exclude the current chat and any
|
||||
@@ -892,7 +894,9 @@ export default function Chat() {
|
||||
onDelete={deleteChat}
|
||||
onDeleteAll={promptDeleteAll}
|
||||
onRename={renameChat}
|
||||
onExport={(chat) => exportChatAsMarkdown(chat)}
|
||||
onExport={(chat) => downloadChatAsMarkdown(chat)}
|
||||
onCopyChat={(chat) => copyChatAsMarkdown(chat)}
|
||||
onDuplicate={(chat) => { if (forkChat(chat.id)) addToast(t('toasts.forked'), 'success', 2000) }}
|
||||
/>
|
||||
{activeChat.localaiAssistant && (
|
||||
<span
|
||||
@@ -1184,11 +1188,19 @@ export default function Chat() {
|
||||
<button onClick={() => copyMessage(msg.content)} title={t('actions.copy')}>
|
||||
<i className="fas fa-copy" />
|
||||
</button>
|
||||
{msg.role === 'assistant' && i === activeChat.history.length - 1 && !isStreaming && (
|
||||
<button onClick={handleRegenerate} title={t('actions.regenerate')}>
|
||||
{msg.role === 'assistant' && !isStreaming && (
|
||||
<button onClick={() => handleRegenerate(i)} title={t('actions.regenerate')}>
|
||||
<i className="fas fa-rotate" />
|
||||
</button>
|
||||
)}
|
||||
{msg.role === 'assistant' && !isStreaming && (
|
||||
<button
|
||||
onClick={() => { forkChat(activeChat.id, i + 1); addToast(t('toasts.forked'), 'success', 2000) }}
|
||||
title={t('actions.branch')}
|
||||
>
|
||||
<i className="fas fa-code-branch" />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user