mirror of
https://github.com/mudler/LocalAI.git
synced 2026-07-03 04:46:54 -04:00
Compare commits
8 Commits
worktree-i
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
715d4ed8e5 | ||
|
|
9fcc9c0d43 | ||
|
|
3c67b5b746 | ||
|
|
bea66fd84e | ||
|
|
f7a5dfd5ae | ||
|
|
6bcaf30c14 | ||
|
|
ef15b4bfda | ||
|
|
237bce48e8 |
@@ -1,5 +1,5 @@
|
||||
|
||||
IK_LLAMA_VERSION?=068b173649f2fd8dc96b35ada5a0b76d8985105d
|
||||
IK_LLAMA_VERSION?=87fc8701ff4da81a7d2a91ec0695f95eb3066a47
|
||||
LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=4fc4ec5541b243957ae5099edb67372f8f3b550e
|
||||
LLAMA_VERSION?=fdb1db877c526ec90f668eca1b858da5dba85560
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
# Local development: point at a working checkout instead of cloning, e.g.
|
||||
# make PRIVACY_FILTER_SRC=$HOME/c/privacy-filter.cpp grpc-server
|
||||
|
||||
PRIVACY_FILTER_VERSION?=595f59630c69d361b5196f2aba2c71c873d0c13c
|
||||
PRIVACY_FILTER_VERSION?=735a6c28607ee82afc3a670383f41b55266a3b9a
|
||||
PRIVACY_FILTER_REPO?=https://github.com/localai-org/privacy-filter.cpp
|
||||
PRIVACY_FILTER_SRC?=
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# CrispASR version (release tag)
|
||||
CRISPASR_REPO?=https://github.com/CrispStrobe/CrispASR
|
||||
CRISPASR_VERSION?=fcbc8718e654995e3bd2d0c98bcb8e55e297d23c
|
||||
CRISPASR_VERSION?=9a26976a8c8cf5af0afcdd04463cf8ba91e96a54
|
||||
SO_TARGET?=libgocrispasr.so
|
||||
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=3590aa8d626e671a1b1dc84506ea2932a243a480
|
||||
STABLEDIFFUSION_GGML_VERSION?=2574f5936571645f784b77623e1f09bad97d948a
|
||||
|
||||
CMAKE_ARGS+=-DGGML_MAX_NAME=128
|
||||
|
||||
|
||||
@@ -35,6 +35,21 @@ if [ "x${BUILD_PROFILE}" == "xcpu" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match"
|
||||
fi
|
||||
|
||||
# AMD ROCm: vLLM ships prebuilt ROCm wheels, but on a DEDICATED index
|
||||
# (https://wheels.vllm.ai/rocm/), NOT PyPI, and ONLY for CPython 3.12. On any
|
||||
# other Python the installer silently falls back to the CUDA-only PyPI wheel,
|
||||
# which is unusable on an AMD GPU (import fails, so the backend never finds the
|
||||
# vllm module). Force Python 3.12 before the venv is created (matches the
|
||||
# intel/l4t13 cp312 bump); the hipblas branch below pulls vllm from the ROCm
|
||||
# wheel index. unsafe-best-match lets uv consult that index and PyPI together.
|
||||
# https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm
|
||||
if [ "x${BUILD_TYPE}" == "xhipblas" ]; then
|
||||
PYTHON_VERSION="3.12"
|
||||
PYTHON_PATCH="12"
|
||||
PY_STANDALONE_TAG="20251120"
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --index-strategy=unsafe-best-match"
|
||||
fi
|
||||
|
||||
# cublas13 pulls the vLLM wheel from a per-tag cu130 index (PyPI's vllm wheel
|
||||
# is built against CUDA 12 and won't load on cu130). uv's default per-package
|
||||
# first-match strategy would still pick the PyPI wheel, so allow it to consult
|
||||
@@ -104,7 +119,7 @@ if [ "$(uname -s)" = "Darwin" ]; then
|
||||
# can rewrite it. Darwin therefore follows vllm-metal and can lag the Linux
|
||||
# vllm pin (requirements-cublas13-after.txt, bumped independently against
|
||||
# vllm/vllm) until vllm-metal supports a newer vLLM.
|
||||
VLLM_METAL_VERSION="v0.3.0.dev20260701132215"
|
||||
VLLM_METAL_VERSION="v0.3.0.dev20260701212152"
|
||||
|
||||
# The coupled vLLM source version is whatever this vllm-metal release builds
|
||||
# against -- it declares it in its own installer as `vllm_v=`. Derive it from
|
||||
@@ -194,6 +209,22 @@ elif [ "x${BUILD_TYPE}" == "xintel" ]; then
|
||||
export CMAKE_PREFIX_PATH="$(python -c 'import site; print(site.getsitepackages()[0])'):${CMAKE_PREFIX_PATH:-}"
|
||||
VLLM_TARGET_DEVICE=xpu uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} --no-deps .
|
||||
popd
|
||||
# AMD ROCm: install vllm from its dedicated ROCm wheel index instead of the
|
||||
# CUDA-only PyPI wheel. installRequirements brings the base ROCm
|
||||
# torch/transformers (requirements-hipblas.txt), then we pull vllm (plus the
|
||||
# matching ROCm torch, via --upgrade) from wheels.vllm.ai/rocm. This is the
|
||||
# method upstream prescribes for AMD; the Python-3.12 pin is set above.
|
||||
# There is intentionally no requirements-hipblas-after.txt: a bare `vllm`
|
||||
# there would resolve to the CUDA wheel, and installRequirements never loads
|
||||
# a ${BUILD_TYPE}-after file for hipblas anyway (BUILD_TYPE == BUILD_PROFILE).
|
||||
# https://docs.vllm.ai/en/latest/getting_started/installation/gpu.html?device=rocm
|
||||
elif [ "x${BUILD_TYPE}" == "xhipblas" ]; then
|
||||
installRequirements
|
||||
|
||||
# --upgrade reconciles the base ROCm torch to whatever the vllm ROCm wheel
|
||||
# pins; --extra-index-url adds the ROCm wheel repository on top of PyPI.
|
||||
uv pip install ${EXTRA_PIP_INSTALL_FLAGS:-} \
|
||||
--extra-index-url https://wheels.vllm.ai/rocm/ --upgrade vllm
|
||||
# FROM_SOURCE=true on a CPU build skips the prebuilt vllm wheel in
|
||||
# requirements-cpu-after.txt and compiles vllm locally against the host's
|
||||
# actual CPU. Not used by default because it takes ~30-40 minutes, but
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
vllm
|
||||
@@ -157,33 +157,6 @@ var _ = Describe("X-LocalAI-Node ctx propagation contract", func() {
|
||||
stampViaRouterCtx()
|
||||
})
|
||||
|
||||
// Regression for #10636: a canceled request context must NOT cancel the
|
||||
// model LOAD. The heavy image/audio backends bind the load to the request
|
||||
// context so the routing holder reaches the SmartRouter; but a large
|
||||
// diffusers/LLM model on a slow (e.g. shared-memory iGPU) host can take
|
||||
// far longer to load than the client stays connected. If the request's
|
||||
// cancellation propagates to the load, the LoadModel RPC is aborted, the
|
||||
// backend process is torn down, and every retry restarts from scratch and
|
||||
// never converges. The load must instead run to completion and cache while
|
||||
// still carrying the request's routing holder value.
|
||||
It("ImageGeneration does not propagate request cancellation to the model load", func() {
|
||||
canceledCtx, cancel := context.WithCancel(reqCtx)
|
||||
cancel() // client disconnected while the (slow) load was still running
|
||||
|
||||
_, err := backend.ImageGeneration(canceledCtx, 64, 64, 1, 0, "p", "", "", "/tmp/out.png", loader, modelCfg, appCfg, nil)
|
||||
// The load reached the router (short-circuit sentinel), i.e. it was
|
||||
// NOT aborted early by the already-canceled request context.
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("router short-circuit (test)"))
|
||||
|
||||
routerCtx := routerCtxOf()
|
||||
Expect(routerCtx).ToNot(BeNil(), "router callback must have been invoked")
|
||||
Expect(routerCtx.Err()).To(BeNil(),
|
||||
"a canceled request must not cancel the model load")
|
||||
// The routing holder value still propagates despite the decoupling.
|
||||
stampViaRouterCtx()
|
||||
})
|
||||
|
||||
It("does NOT leak the holder when the app context is used instead", func() {
|
||||
// Sanity: the bug being fixed manifests as the router getting
|
||||
// appCfg.Context (no holder) instead of reqCtx (holder). A direct
|
||||
|
||||
@@ -40,14 +40,10 @@ func (e *modelEmbedder) Embed(ctx context.Context, text string) ([]float32, erro
|
||||
|
||||
func ModelEmbedding(ctx context.Context, s string, tokens []int, loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
||||
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
|
||||
inferenceModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
|
||||
@@ -13,14 +13,10 @@ import (
|
||||
|
||||
func ImageGeneration(ctx context.Context, height, width, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig, refImages []string) (func() error, error) {
|
||||
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
inferenceModel, err := loader.Load(
|
||||
opts...,
|
||||
)
|
||||
|
||||
@@ -111,12 +111,7 @@ func ModelInference(ctx context.Context, s string, messages schema.Messages, ima
|
||||
}
|
||||
ctx = distributedhdr.MaybeWithPrefixChain(ctx, c.ModelID(), chainSource)
|
||||
|
||||
// context.WithoutCancel decouples the model load from the request's
|
||||
// cancellation while preserving its routing values, so a slow load still
|
||||
// completes and caches if the client disconnects instead of aborting the
|
||||
// LoadModel RPC mid-load (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(*c, o, model.WithContext(context.WithoutCancel(ctx)))
|
||||
opts := ModelOptions(*c, o, model.WithContext(ctx))
|
||||
inferenceModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(o, c.Name, c.Backend, err, map[string]any{"model_file": modelFile})
|
||||
|
||||
@@ -57,14 +57,10 @@ func (r *modelReranker) Rerank(ctx context.Context, query string, documents []st
|
||||
}
|
||||
|
||||
func Rerank(ctx context.Context, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, modelConfig config.ModelConfig) (*proto.RerankResult, error) {
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
rerankModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
|
||||
@@ -45,14 +45,10 @@ func loadTranscriptionModel(ctx context.Context, ml *model.ModelLoader, modelCon
|
||||
if modelConfig.Backend == "" {
|
||||
modelConfig.Backend = model.WhisperBackend
|
||||
}
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
transcriptionModel, err := ml.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
|
||||
@@ -50,14 +50,10 @@ func ModelTTS(
|
||||
appConfig *config.ApplicationConfig,
|
||||
modelConfig config.ModelConfig,
|
||||
) (string, *proto.Result, error) {
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
ttsModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
@@ -157,9 +153,7 @@ func ModelTTSStream(
|
||||
modelConfig config.ModelConfig,
|
||||
audioCallback func([]byte) error,
|
||||
) error {
|
||||
// See ModelTTS above: WithoutCancel decouples the load from request
|
||||
// cancellation while preserving routing values (issue #10636).
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
ttsModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
|
||||
@@ -14,14 +14,10 @@ func VAD(request *schema.VADRequest,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
modelConfig config.ModelConfig) (*schema.VADResponse, error) {
|
||||
// model.WithContext carries the request context into the load so distributed
|
||||
// routing decisions reach the request's X-LocalAI-Node holder via
|
||||
// distributedhdr.Stamp. context.WithoutCancel keeps those values but drops
|
||||
// the request's cancellation, so a slow first load still completes and
|
||||
// caches if the client disconnects instead of aborting the LoadModel RPC and
|
||||
// tearing down the backend process (issue #10636). Inference below keeps the
|
||||
// cancellable ctx, so a disconnect still stops generation.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(context.WithoutCancel(ctx)))
|
||||
// model.WithContext(ctx) overrides the app-context default set in
|
||||
// ModelOptions so distributed routing decisions reach the request's
|
||||
// X-LocalAI-Node holder via distributedhdr.Stamp.
|
||||
opts := ModelOptions(modelConfig, appConfig, model.WithContext(ctx))
|
||||
vadModel, err := ml.Load(opts...)
|
||||
if err != nil {
|
||||
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
|
||||
|
||||
133
core/http/react-ui/e2e/forking-chat.spec.js
Normal file
133
core/http/react-ui/e2e/forking-chat.spec.js
Normal file
@@ -0,0 +1,133 @@
|
||||
import { test, expect } from './coverage-fixtures.js'
|
||||
|
||||
// Seeds two-message chat into localStorage so we don't need a live model.
|
||||
async function seedChat(page, history) {
|
||||
await page.addInitScript((h) => {
|
||||
const chat = {
|
||||
id: 'seed1', name: 'Seeded Chat', model: 'test-model',
|
||||
history: h, systemPrompt: '', mcpMode: false, mcpServers: [],
|
||||
clientMCPServers: [], temperature: null, topP: null, topK: null,
|
||||
tokenUsage: { prompt: 0, completion: 0, total: 0 },
|
||||
contextSize: null, createdAt: Date.now(), updatedAt: Date.now(),
|
||||
}
|
||||
localStorage.setItem('localai_chats_data', JSON.stringify({
|
||||
chats: [chat], activeChatId: 'seed1', lastSaved: Date.now(),
|
||||
}))
|
||||
}, history)
|
||||
}
|
||||
|
||||
async function mockModels(page) {
|
||||
await page.route('**/api/models/capabilities', (route) => route.fulfill({
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({ data: [{ id: 'test-model', capabilities: ['FLAG_CHAT'] }] }),
|
||||
}))
|
||||
await page.route('**/api/operations', (route) => route.fulfill({
|
||||
contentType: 'application/json', body: JSON.stringify({ operations: [] }),
|
||||
}))
|
||||
}
|
||||
|
||||
const TWO_TURNS = [
|
||||
{ role: 'user', content: 'first question' },
|
||||
{ role: 'assistant', content: 'first answer' },
|
||||
{ role: 'user', content: 'second question' },
|
||||
{ role: 'assistant', content: 'second answer' },
|
||||
]
|
||||
|
||||
test('duplicate creates an independent copy and switches to it', async ({ page }) => {
|
||||
await mockModels(page)
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
// Open the chats menu (Ctrl/Cmd+K) and duplicate the seeded chat.
|
||||
// Wait for the menu trigger to mount so its global keydown listener is armed
|
||||
// before we dispatch the shortcut.
|
||||
await page.getByTitle('Conversations (Ctrl/Cmd+K)').waitFor()
|
||||
await page.keyboard.press('Control+k')
|
||||
await page.getByTitle('Duplicate chat').first().click()
|
||||
|
||||
// A new active chat named "Seeded Chat (fork)" with the same 4 messages.
|
||||
await expect(page.locator('.chat-header-title')).toHaveText('Seeded Chat (fork)')
|
||||
await expect(page.locator('.chat-message-user')).toHaveCount(2)
|
||||
await expect(page.locator('.chat-message-assistant')).toHaveCount(2)
|
||||
})
|
||||
|
||||
async function mockCompletion(page, replyText) {
|
||||
await page.route('**/v1/chat/completions', (route) => {
|
||||
const sse =
|
||||
`data: ${JSON.stringify({ choices: [{ delta: { content: replyText } }] })}\n\n` +
|
||||
`data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: 'stop' }], usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 } })}\n\n` +
|
||||
`data: [DONE]\n\n`
|
||||
route.fulfill({ status: 200, contentType: 'text/event-stream', body: sse })
|
||||
})
|
||||
}
|
||||
|
||||
test('retry regenerates the first answer and drops the later turn', async ({ page }) => {
|
||||
await mockModels(page)
|
||||
// Capture the outbound request body so we can assert the model receives the
|
||||
// truncated history (not the stale downstream turns).
|
||||
let sentMessages = null
|
||||
await page.route('**/v1/chat/completions', (route) => {
|
||||
sentMessages = route.request().postDataJSON()?.messages || []
|
||||
const sse =
|
||||
`data: ${JSON.stringify({ choices: [{ delta: { content: 'REGENERATED first answer' } }] })}\n\n` +
|
||||
`data: ${JSON.stringify({ choices: [{ delta: {}, finish_reason: 'stop' }], usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 } })}\n\n` +
|
||||
`data: [DONE]\n\n`
|
||||
route.fulfill({ status: 200, contentType: 'text/event-stream', body: sse })
|
||||
})
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
// Hover the FIRST assistant message and click its retry button.
|
||||
const firstAssistant = page.locator('.chat-message-assistant').first()
|
||||
await firstAssistant.hover()
|
||||
await firstAssistant.getByTitle('Regenerate').click()
|
||||
|
||||
// History is truncated to the first user turn, then the new answer streams in;
|
||||
// the second Q/A turn is gone.
|
||||
await expect(page.locator('.chat-message-assistant')).toContainText(['REGENERATED first answer'])
|
||||
await expect(page.locator('.chat-message-user')).toHaveCount(1)
|
||||
await expect(page.locator('.chat-message-assistant')).toHaveCount(1)
|
||||
|
||||
// The OUTBOUND payload must also be truncated: the resent user turn is present,
|
||||
// but the downstream turn and the stale first answer must be gone.
|
||||
const contents = (sentMessages || []).map(m =>
|
||||
typeof m.content === 'string' ? m.content : JSON.stringify(m.content)
|
||||
)
|
||||
expect(contents.join('\n')).toContain('first question')
|
||||
expect(contents.join('\n')).not.toContain('second question')
|
||||
expect(contents.join('\n')).not.toContain('first answer')
|
||||
})
|
||||
|
||||
test('copy chat puts the whole conversation on the clipboard', async ({ page, context }) => {
|
||||
await context.grantPermissions(['clipboard-read', 'clipboard-write'])
|
||||
await mockModels(page)
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
// Wait for the menu trigger to mount so its global keydown listener is armed
|
||||
// before we dispatch the shortcut (same mount-race guard as the duplicate test).
|
||||
await page.getByTitle('Conversations (Ctrl/Cmd+K)').waitFor()
|
||||
await page.keyboard.press('Control+k')
|
||||
await page.getByTitle('Copy chat').first().click()
|
||||
|
||||
const clip = await page.evaluate(() => navigator.clipboard.readText())
|
||||
expect(clip).toContain('# Seeded Chat')
|
||||
expect(clip).toContain('first answer')
|
||||
expect(clip).toContain('second answer')
|
||||
})
|
||||
|
||||
test('branch from the first answer forks history up to that point', async ({ page }) => {
|
||||
await mockModels(page)
|
||||
await seedChat(page, TWO_TURNS)
|
||||
await page.goto('/app/chat')
|
||||
|
||||
const firstAssistant = page.locator('.chat-message-assistant').first()
|
||||
await firstAssistant.hover()
|
||||
await firstAssistant.getByTitle('Branch from here').click()
|
||||
|
||||
// New active chat "Seeded Chat (fork)" contains only the first Q/A turn.
|
||||
await expect(page.locator('.chat-header-title')).toHaveText('Seeded Chat (fork)')
|
||||
await expect(page.locator('.chat-message-user')).toHaveCount(1)
|
||||
await expect(page.locator('.chat-message-assistant')).toHaveCount(1)
|
||||
await expect(page.locator('.chat-message-assistant')).toContainText(['first answer'])
|
||||
})
|
||||
@@ -72,6 +72,7 @@
|
||||
"actions": {
|
||||
"copy": "Copy",
|
||||
"regenerate": "Regenerate",
|
||||
"branch": "Branch from here",
|
||||
"jumpToLatest": "Jump to latest"
|
||||
},
|
||||
"streaming": {
|
||||
@@ -100,7 +101,9 @@
|
||||
"toasts": {
|
||||
"selectModel": "Please select a model",
|
||||
"copied": "Copied to clipboard",
|
||||
"copyFailed": "Could not copy to clipboard"
|
||||
"copyFailed": "Could not copy to clipboard",
|
||||
"chatCopied": "Chat copied to clipboard",
|
||||
"forked": "Created a new chat"
|
||||
},
|
||||
"menu": {
|
||||
"trigger": "Chats",
|
||||
@@ -110,6 +113,8 @@
|
||||
"noMatch": "No conversations match your search",
|
||||
"noConversations": "No conversations yet",
|
||||
"rename": "Rename",
|
||||
"duplicate": "Duplicate chat",
|
||||
"copyChat": "Copy chat",
|
||||
"exportMarkdown": "Export as Markdown",
|
||||
"deleteChat": "Delete chat",
|
||||
"newChat": "New chat",
|
||||
|
||||
@@ -24,6 +24,8 @@ const ChatsMenu = forwardRef(function ChatsMenu({
|
||||
onDeleteAll,
|
||||
onRename,
|
||||
onExport,
|
||||
onCopyChat,
|
||||
onDuplicate,
|
||||
}, ref) {
|
||||
const { t } = useTranslation('chat')
|
||||
const [open, setOpen] = useState(false)
|
||||
@@ -230,6 +232,24 @@ const ChatsMenu = forwardRef(function ChatsMenu({
|
||||
>
|
||||
<i className="fas fa-pen" />
|
||||
</button>
|
||||
{onDuplicate && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={(e) => { e.stopPropagation(); onDuplicate(chat); setOpen(false) }}
|
||||
title={t('menu.duplicate')}
|
||||
>
|
||||
<i className="fas fa-clone" />
|
||||
</button>
|
||||
)}
|
||||
{(chat.history?.length || 0) > 0 && onCopyChat && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={(e) => { e.stopPropagation(); onCopyChat(chat) }}
|
||||
title={t('menu.copyChat')}
|
||||
>
|
||||
<i className="fas fa-clipboard" />
|
||||
</button>
|
||||
)}
|
||||
{(chat.history?.length || 0) > 0 && onExport && (
|
||||
<button
|
||||
type="button"
|
||||
|
||||
27
core/http/react-ui/src/hooks/useChat.js
vendored
27
core/http/react-ui/src/hooks/useChat.js
vendored
@@ -141,6 +141,24 @@ export function useChat(initialModel = '') {
|
||||
return chat
|
||||
}, [])
|
||||
|
||||
const forkChat = useCallback((chatId, uptoIndex) => {
|
||||
const src = chats.find(c => c.id === chatId)
|
||||
if (!src) return null
|
||||
const end = typeof uptoIndex === 'number' ? uptoIndex : src.history.length
|
||||
const forked = {
|
||||
...src,
|
||||
id: generateId(),
|
||||
name: `${src.name} (fork)`,
|
||||
history: structuredClone(src.history.slice(0, end)),
|
||||
tokenUsage: { prompt: 0, completion: 0, total: 0 },
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now(),
|
||||
}
|
||||
setChats(prev => [forked, ...prev])
|
||||
setActiveChatId(forked.id)
|
||||
return forked
|
||||
}, [chats])
|
||||
|
||||
const switchChat = useCallback((chatId) => {
|
||||
setActiveChatId(chatId)
|
||||
setStreamingContent('')
|
||||
@@ -260,8 +278,12 @@ export function useChat(initialModel = '') {
|
||||
if (chat?.systemPrompt) {
|
||||
messages.push({ role: 'system', content: chat.systemPrompt })
|
||||
}
|
||||
// Filter out thinking/reasoning/tool_call/tool_result messages
|
||||
const historyForApi = (chat?.history || []).filter(m =>
|
||||
// Filter out thinking/reasoning/tool_call/tool_result messages.
|
||||
// options.baseHistory lets callers (e.g. mid-conversation retry) pass the
|
||||
// intended truncated history synchronously; the closure `chat` still holds
|
||||
// the stale pre-truncation state because setChats only schedules an update.
|
||||
const baseHistory = options.baseHistory || chat?.history || []
|
||||
const historyForApi = baseHistory.filter(m =>
|
||||
m.role !== 'thinking' && m.role !== 'reasoning' && m.role !== 'tool_call' && m.role !== 'tool_result'
|
||||
)
|
||||
messages.push(...historyForApi, { role: 'user', content: messageContent })
|
||||
@@ -793,6 +815,7 @@ export function useChat(initialModel = '') {
|
||||
tokensPerSecond,
|
||||
maxTokensPerSecond,
|
||||
addChat,
|
||||
forkChat,
|
||||
switchChat,
|
||||
deleteChat,
|
||||
deleteAllChats,
|
||||
|
||||
@@ -33,7 +33,7 @@ function getLastMessagePreview(chat) {
|
||||
return ''
|
||||
}
|
||||
|
||||
function exportChatAsMarkdown(chat) {
|
||||
function serializeChatAsMarkdown(chat) {
|
||||
let md = `# ${chat.name}\n\n`
|
||||
md += `Model: ${chat.model || 'Unknown'}\n`
|
||||
md += `Date: ${new Date(chat.createdAt).toLocaleString()}\n\n---\n\n`
|
||||
@@ -47,7 +47,11 @@ function exportChatAsMarkdown(chat) {
|
||||
md += `<details><summary>Thinking</summary>\n\n${msg.content}\n\n</details>\n\n`
|
||||
}
|
||||
}
|
||||
const blob = new Blob([md], { type: 'text/markdown' })
|
||||
return md
|
||||
}
|
||||
|
||||
function downloadChatAsMarkdown(chat) {
|
||||
const blob = new Blob([serializeChatAsMarkdown(chat)], { type: 'text/markdown' })
|
||||
const url = URL.createObjectURL(blob)
|
||||
const a = document.createElement('a')
|
||||
a.href = url
|
||||
@@ -294,7 +298,7 @@ export default function Chat() {
|
||||
const {
|
||||
chats, activeChat, activeChatId, isStreaming, streamingChatId, streamingContent,
|
||||
streamingReasoning, streamingToolCalls, tokensPerSecond, maxTokensPerSecond,
|
||||
addChat, switchChat, deleteChat, deleteAllChats, renameChat, updateChatSettings,
|
||||
addChat, forkChat, switchChat, deleteChat, deleteAllChats, renameChat, updateChatSettings,
|
||||
sendMessage, stopGeneration, clearHistory, getContextUsagePercent, addMessage,
|
||||
} = useChat(urlModel || '')
|
||||
|
||||
@@ -795,34 +799,27 @@ export default function Chat() {
|
||||
await sendMessage(msg, files, mcpOptions)
|
||||
}, [input, files, activeChat, sendMessage, addToast, getToolsForLLM, isClientTool, executeTool, hasAppUI, getAppResource, getToolDefinition])
|
||||
|
||||
const handleRegenerate = useCallback(async () => {
|
||||
const handleRegenerate = useCallback(async (targetIndex) => {
|
||||
if (!activeChat || isStreaming) return
|
||||
const history = activeChat.history
|
||||
let lastUserMsg = null
|
||||
let lastUserFiles = null
|
||||
for (let i = history.length - 1; i >= 0; i--) {
|
||||
if (history[i].role === 'user') {
|
||||
lastUserMsg = typeof history[i].content === 'string' ? history[i].content : history[i].content?.[0]?.text || ''
|
||||
lastUserFiles = history[i].files || []
|
||||
break
|
||||
}
|
||||
const end = typeof targetIndex === 'number' ? targetIndex : history.length
|
||||
// Nearest user message at or before the target answer.
|
||||
let userIdx = -1
|
||||
for (let i = Math.min(end, history.length) - 1; i >= 0; i--) {
|
||||
if (history[i].role === 'user') { userIdx = i; break }
|
||||
}
|
||||
if (!lastUserMsg) return
|
||||
|
||||
// Remove everything after and including the last user message
|
||||
const newHistory = []
|
||||
let foundLastUser = false
|
||||
for (let i = history.length - 1; i >= 0; i--) {
|
||||
if (!foundLastUser && history[i].role === 'user') {
|
||||
foundLastUser = true
|
||||
continue
|
||||
}
|
||||
if (foundLastUser) {
|
||||
newHistory.unshift(history[i])
|
||||
}
|
||||
}
|
||||
updateChatSettings(activeChat.id, { history: newHistory })
|
||||
await sendMessage(lastUserMsg, lastUserFiles)
|
||||
if (userIdx === -1) return
|
||||
const userMsg = typeof history[userIdx].content === 'string'
|
||||
? history[userIdx].content
|
||||
: history[userIdx].content?.[0]?.text || ''
|
||||
const userFiles = history[userIdx].files || []
|
||||
// Drop the user turn and everything after it; sendMessage re-appends it.
|
||||
// Thread the truncated history through explicitly: updateChatSettings only
|
||||
// schedules a state update, so sendMessage's closure would otherwise read
|
||||
// the stale pre-truncation history for the outbound API payload.
|
||||
const baseHistory = history.slice(0, userIdx)
|
||||
updateChatSettings(activeChat.id, { history: baseHistory })
|
||||
await sendMessage(userMsg, userFiles, { baseHistory })
|
||||
}, [activeChat, isStreaming, sendMessage, updateChatSettings])
|
||||
|
||||
const handleKeyDown = (e) => {
|
||||
@@ -852,6 +849,11 @@ export default function Chat() {
|
||||
}
|
||||
}
|
||||
|
||||
const copyChatAsMarkdown = async (chat) => {
|
||||
const ok = await copyToClipboard(serializeChatAsMarkdown(chat))
|
||||
addToast(ok ? t('toasts.chatCopied') : t('toasts.copyFailed'), ok ? 'success' : 'error', ok ? 2000 : 3000)
|
||||
}
|
||||
|
||||
const contextPercent = getContextUsagePercent()
|
||||
|
||||
// Recent chats for the empty state — exclude the current chat and any
|
||||
@@ -892,7 +894,9 @@ export default function Chat() {
|
||||
onDelete={deleteChat}
|
||||
onDeleteAll={promptDeleteAll}
|
||||
onRename={renameChat}
|
||||
onExport={(chat) => exportChatAsMarkdown(chat)}
|
||||
onExport={(chat) => downloadChatAsMarkdown(chat)}
|
||||
onCopyChat={(chat) => copyChatAsMarkdown(chat)}
|
||||
onDuplicate={(chat) => { if (forkChat(chat.id)) addToast(t('toasts.forked'), 'success', 2000) }}
|
||||
/>
|
||||
{activeChat.localaiAssistant && (
|
||||
<span
|
||||
@@ -1184,11 +1188,19 @@ export default function Chat() {
|
||||
<button onClick={() => copyMessage(msg.content)} title={t('actions.copy')}>
|
||||
<i className="fas fa-copy" />
|
||||
</button>
|
||||
{msg.role === 'assistant' && i === activeChat.history.length - 1 && !isStreaming && (
|
||||
<button onClick={handleRegenerate} title={t('actions.regenerate')}>
|
||||
{msg.role === 'assistant' && !isStreaming && (
|
||||
<button onClick={() => handleRegenerate(i)} title={t('actions.regenerate')}>
|
||||
<i className="fas fa-rotate" />
|
||||
</button>
|
||||
)}
|
||||
{msg.role === 'assistant' && !isStreaming && (
|
||||
<button
|
||||
onClick={() => { forkChat(activeChat.id, i + 1); addToast(t('toasts.forked'), 'success', 2000) }}
|
||||
title={t('actions.branch')}
|
||||
>
|
||||
<i className="fas fa-code-branch" />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user