fix(server): stream the backup download so large libraries don't time out (#3005)

* fix(server): stream the backup download so large libraries don't time out

Part of issue #2987 ("Get backup" reportedly never completes on a
Pi 3B+): the settings download built the whole tar.gz on disk before
sending the first byte. tar+gzip at the default level 9 measures
98 s for 355 MB on a Pi 4 (~3.6 MB/s) — a multi-GB library on a Pi 3
sends nothing for longer than a browser keeps a byte-less request
alive, so the download always aborted.

- settings_backup now streams the archive while it is built
  (StreamingHttpResponse over a pipe fed by a tarfile producer
  thread): first bytes hit the wire immediately, nothing is staged
  on the SD card, and a client disconnect stops the producer.
- gzip level drops to 1 for both the streamed and the API
  (create_backup) paths — backups are mostly already-compressed
  media, so level 9 burned minutes of CPU for ~no size win.
- The Content-Disposition filename is RFC-8187-escaped via Django's
  content_disposition_header (player_name is operator-controlled).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* fix(server): surface backup-stream producer failures, fix mypy type

- stream_backup() re-raises a producer failure once the pipe drains,
  so the response aborts mid-transfer instead of completing 200 with
  a silently truncated archive (review feedback). Client disconnects
  still log-and-stop without morphing into spurious errors.
- Return type is Generator (not Iterator) — the disconnect test
  calls .close(), which mypy rejects on Iterator.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Viktor Petersson
2026-06-07 08:00:28 +02:00
committed by GitHub
parent 10c68b26cc
commit da53e045a6
3 changed files with 182 additions and 23 deletions

View File

@@ -10,12 +10,20 @@ from urllib.parse import urlparse, urlunparse
from django.contrib import messages
from django.contrib.auth import authenticate
from django.contrib.auth import login as django_login
from django.http import FileResponse, HttpRequest, HttpResponse
from django.http import (
FileResponse,
HttpRequest,
HttpResponse,
StreamingHttpResponse,
)
from django.http.response import HttpResponseBase
from django.shortcuts import redirect
from django.urls import reverse
from django.utils import timezone
from django.utils.http import url_has_allowed_host_and_scheme
from django.utils.http import (
content_disposition_header,
url_has_allowed_host_and_scheme,
)
from django.views.decorators.http import require_http_methods
from anthias_server.app import page_context
@@ -966,26 +974,31 @@ def settings_save(request: HttpRequest) -> HttpResponse:
@authorized
@require_http_methods(['POST'])
def settings_backup(request: HttpRequest) -> HttpResponseBase:
"""Same as api.views.mixins.BackupViewMixin.post but streams the
archive back inline instead of returning the filename + relying
on a follow-up /static_with_mime/ fetch."""
from os import getenv
"""Stream the backup archive as it is being built.
filename = backup_helper.create_backup(name=settings['player_name'])
# backup_helper.create_backup writes to $HOME/anthias/staticfiles/.
# Reach for the same base here so we serve the archive from the
# path it was actually written to. The pre-fix path.join('static',
# filename) was relative to CWD and would FileNotFoundError in
# production where uvicorn runs out of /usr/src/app.
archive_path = path.join(
getenv('HOME') or '', 'anthias/staticfiles', filename
)
response = FileResponse(
open(archive_path, 'rb'),
as_attachment=True,
filename=filename,
The pre-#2987 shape — create_backup() to a staging file, then a
FileResponse — sent zero response bytes while tar+gzip ran, and
that build takes minutes on an SBC with a real content library.
Browsers abort a request that has produced no bytes for ~5
minutes, so on devices like the reporter's Pi 3 "Get backup"
spun and then silently failed every time. stream_backup() puts
the first tar block on the wire immediately and keeps bytes
flowing for the whole build (and needs no staging space on the
SD card). The iterator is synchronous — Django adapts it under
ASGI via its thread executor, which is fine for an operation
this rare.
"""
filename = backup_helper.backup_archive_name(settings['player_name'])
response = StreamingHttpResponse(
backup_helper.stream_backup(),
content_type='application/x-tgz',
)
# content_disposition_header() RFC-8187-escapes the filename —
# player_name is operator-controlled, so it must not be able to
# smuggle quotes/control chars into the header.
response['Content-Disposition'] = content_disposition_header(
as_attachment=True, filename=filename
)
return response

View File

@@ -2,6 +2,8 @@ import logging
import os
import sys
import tarfile
import threading
from collections.abc import Generator
from datetime import datetime
from os import getenv, makedirs, path, remove
from typing import Any
@@ -53,12 +55,90 @@ class BackupRecoverError(Exception):
"""Raised when a backup archive cannot be safely recovered."""
def create_backup(name: str = default_archive_name) -> str:
home = getenv('HOME') or ''
archive_name = '{}-{}.tar.gz'.format(
# gzip level for backup archives. The bulk of a backup is video/image
# assets that are already compressed, so the default level 9 burns
# minutes of single-core CPU on an SBC for ~no size win — measured
# 98 s for 355 MB on a Pi 4 (~3.6 MB/s); a multi-GB library on a Pi 3
# runs well past the browser's request timeout (issue #2987, the
# "Get backup never downloads" report). Level 1 is ~4-5x faster and
# within a couple of percent on size for this content mix.
BACKUP_COMPRESSLEVEL = 1
# Chunk size for stream_backup(). 64 KiB matches the pipe capacity on
# Linux so the tar producer thread and the HTTP writer interleave
# without either side stalling on tiny reads.
_STREAM_CHUNK_BYTES = 64 * 1024
def backup_archive_name(name: str = default_archive_name) -> str:
return '{}-{}.tar.gz'.format(
name if name else default_archive_name,
datetime.now().strftime('%Y-%m-%dT%H-%M-%S'),
)
def stream_backup() -> Generator[bytes, None, None]:
"""Yield a backup tar.gz as it is being built.
The download path used to write the whole archive to disk before
sending the first byte. tar+gzip of a multi-GB asset library takes
minutes on an SBC, and a browser kills a request that has produced
no response bytes for ~5 minutes — so "Get backup" simply never
completed on devices with a real content library (issue #2987).
Streaming starts the response immediately, keeps bytes flowing for
the whole build, and as a bonus never needs staging space on the
(often nearly full) SD card.
A producer thread feeds ``tarfile`` through a pipe; the generator
reads the other end. A consumer that disconnects mid-download
closes the read end, the producer hits ``BrokenPipeError`` and
stops — no orphaned thread keeps taring. A producer failure
(missing directory, unreadable file) is re-raised here once the
stream drains, so the response aborts mid-transfer instead of
completing 200 with a silently truncated archive.
"""
home = getenv('HOME') or ''
read_fd, write_fd = os.pipe()
produce_error: list[BaseException] = []
def produce() -> None:
try:
with os.fdopen(write_fd, 'wb') as write_file:
with tarfile.open(
fileobj=write_file,
mode='w|gz',
compresslevel=BACKUP_COMPRESSLEVEL,
) as tar:
for directory in directories:
tar.add(path.join(home, directory), arcname=directory)
except BrokenPipeError:
logging.info('backup download cancelled by the client')
except Exception as exc:
logging.exception('backup stream failed')
produce_error.append(exc)
producer = threading.Thread(
target=produce, name='backup-stream', daemon=True
)
producer.start()
drained = False
try:
with os.fdopen(read_fd, 'rb') as read_file:
while chunk := read_file.read(_STREAM_CHUNK_BYTES):
yield chunk
drained = True
finally:
producer.join(timeout=5)
# Only surface producer failures on the drained path: a
# GeneratorExit (client went away) shouldn't morph into a
# spurious error.
if drained and produce_error:
raise produce_error[0]
def create_backup(name: str = default_archive_name) -> str:
home = getenv('HOME') or ''
archive_name = backup_archive_name(name)
file_path = path.join(home, static_dir, archive_name)
if not path.exists(path.join(home, static_dir)):
@@ -68,7 +148,9 @@ def create_backup(name: str = default_archive_name) -> str:
remove(file_path)
try:
with tarfile.open(file_path, 'w:gz') as tar:
with tarfile.open(
file_path, 'w:gz', compresslevel=BACKUP_COMPRESSLEVEL
) as tar:
for directory in directories:
path_to_dir = path.join(home, directory)
tar.add(path_to_dir, arcname=directory)

View File

@@ -2,6 +2,7 @@ import os
import shutil
import tarfile
import tempfile
import threading
from collections.abc import Iterator
from datetime import datetime
from os import path
@@ -10,9 +11,11 @@ from unittest import mock
import pytest
from anthias_server.lib.backup_helper import (
backup_archive_name,
create_backup,
recover,
static_dir,
stream_backup,
)
@@ -59,6 +62,67 @@ def test_recover(backup_home: str) -> None:
assert not path.isfile(file_path)
def test_backup_archive_name_falls_back_on_empty_name() -> None:
dt = datetime(2016, 7, 19, 12, 42, 12)
with mock.patch(
'anthias_server.lib.backup_helper.datetime'
) as mock_datetime:
mock_datetime.now.return_value = dt
assert (
backup_archive_name('')
== 'anthias-backup-2016-07-19T12-42-12.tar.gz'
)
assert (
backup_archive_name('lobby') == 'lobby-2016-07-19T12-42-12.tar.gz'
)
def test_stream_backup_round_trips_through_recover(
backup_home: str,
) -> None:
# The settings page download streams the archive as it is built
# (issue #2987: the staged-file path produced no response bytes
# for minutes and browsers gave up). The streamed bytes must be a
# well-formed tar.gz that recover() accepts unchanged.
marker = path.join(backup_home, '.anthias', 'anthias.conf')
with open(marker, 'w') as f:
f.write('[viewer]\n')
chunks = list(stream_backup())
assert chunks
os.makedirs(path.join(backup_home, static_dir), exist_ok=True)
file_path = path.join(backup_home, static_dir, 'streamed.tar.gz')
with open(file_path, 'wb') as f:
f.write(b''.join(chunks))
with tarfile.open(file_path, 'r:gz') as tar:
names = tar.getnames()
assert '.anthias' in names
assert 'anthias_assets' in names
assert '.anthias/anthias.conf' in names
os.remove(marker)
recover(file_path)
assert path.isfile(marker)
def test_stream_backup_stops_when_consumer_disconnects(
backup_home: str,
) -> None:
# A closed browser connection must not leave the producer thread
# taring forever — the generator's pipe close propagates as
# BrokenPipeError and the thread exits.
stream = stream_backup()
assert next(stream)
stream.close() # GeneratorExit → read end closed
main_thread = threading.main_thread()
for thread in threading.enumerate():
if thread.name == 'backup-stream' and thread is not main_thread:
thread.join(timeout=5)
assert not thread.is_alive()
@pytest.fixture
def legacy_home() -> Iterator[str]:
"""Backups produced by pre-rename releases used `.screenly` and