mirror of
https://github.com/Screenly/Anthias.git
synced 2026-06-10 09:08:09 -04:00
fix(server): stream the backup download so large libraries don't time out (#3005)
* fix(server): stream the backup download so large libraries don't time out Part of issue #2987 ("Get backup" reportedly never completes on a Pi 3B+): the settings download built the whole tar.gz on disk before sending the first byte. tar+gzip at the default level 9 measures 98 s for 355 MB on a Pi 4 (~3.6 MB/s) — a multi-GB library on a Pi 3 sends nothing for longer than a browser keeps a byte-less request alive, so the download always aborted. - settings_backup now streams the archive while it is built (StreamingHttpResponse over a pipe fed by a tarfile producer thread): first bytes hit the wire immediately, nothing is staged on the SD card, and a client disconnect stops the producer. - gzip level drops to 1 for both the streamed and the API (create_backup) paths — backups are mostly already-compressed media, so level 9 burned minutes of CPU for ~no size win. - The Content-Disposition filename is RFC-8187-escaped via Django's content_disposition_header (player_name is operator-controlled). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(server): surface backup-stream producer failures, fix mypy type - stream_backup() re-raises a producer failure once the pipe drains, so the response aborts mid-transfer instead of completing 200 with a silently truncated archive (review feedback). Client disconnects still log-and-stop without morphing into spurious errors. - Return type is Generator (not Iterator) — the disconnect test calls .close(), which mypy rejects on Iterator. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -10,12 +10,20 @@ from urllib.parse import urlparse, urlunparse
|
||||
from django.contrib import messages
|
||||
from django.contrib.auth import authenticate
|
||||
from django.contrib.auth import login as django_login
|
||||
from django.http import FileResponse, HttpRequest, HttpResponse
|
||||
from django.http import (
|
||||
FileResponse,
|
||||
HttpRequest,
|
||||
HttpResponse,
|
||||
StreamingHttpResponse,
|
||||
)
|
||||
from django.http.response import HttpResponseBase
|
||||
from django.shortcuts import redirect
|
||||
from django.urls import reverse
|
||||
from django.utils import timezone
|
||||
from django.utils.http import url_has_allowed_host_and_scheme
|
||||
from django.utils.http import (
|
||||
content_disposition_header,
|
||||
url_has_allowed_host_and_scheme,
|
||||
)
|
||||
from django.views.decorators.http import require_http_methods
|
||||
|
||||
from anthias_server.app import page_context
|
||||
@@ -966,26 +974,31 @@ def settings_save(request: HttpRequest) -> HttpResponse:
|
||||
@authorized
|
||||
@require_http_methods(['POST'])
|
||||
def settings_backup(request: HttpRequest) -> HttpResponseBase:
|
||||
"""Same as api.views.mixins.BackupViewMixin.post but streams the
|
||||
archive back inline instead of returning the filename + relying
|
||||
on a follow-up /static_with_mime/ fetch."""
|
||||
from os import getenv
|
||||
"""Stream the backup archive as it is being built.
|
||||
|
||||
filename = backup_helper.create_backup(name=settings['player_name'])
|
||||
# backup_helper.create_backup writes to $HOME/anthias/staticfiles/.
|
||||
# Reach for the same base here so we serve the archive from the
|
||||
# path it was actually written to. The pre-fix path.join('static',
|
||||
# filename) was relative to CWD and would FileNotFoundError in
|
||||
# production where uvicorn runs out of /usr/src/app.
|
||||
archive_path = path.join(
|
||||
getenv('HOME') or '', 'anthias/staticfiles', filename
|
||||
)
|
||||
response = FileResponse(
|
||||
open(archive_path, 'rb'),
|
||||
as_attachment=True,
|
||||
filename=filename,
|
||||
The pre-#2987 shape — create_backup() to a staging file, then a
|
||||
FileResponse — sent zero response bytes while tar+gzip ran, and
|
||||
that build takes minutes on an SBC with a real content library.
|
||||
Browsers abort a request that has produced no bytes for ~5
|
||||
minutes, so on devices like the reporter's Pi 3 "Get backup"
|
||||
spun and then silently failed every time. stream_backup() puts
|
||||
the first tar block on the wire immediately and keeps bytes
|
||||
flowing for the whole build (and needs no staging space on the
|
||||
SD card). The iterator is synchronous — Django adapts it under
|
||||
ASGI via its thread executor, which is fine for an operation
|
||||
this rare.
|
||||
"""
|
||||
filename = backup_helper.backup_archive_name(settings['player_name'])
|
||||
response = StreamingHttpResponse(
|
||||
backup_helper.stream_backup(),
|
||||
content_type='application/x-tgz',
|
||||
)
|
||||
# content_disposition_header() RFC-8187-escapes the filename —
|
||||
# player_name is operator-controlled, so it must not be able to
|
||||
# smuggle quotes/control chars into the header.
|
||||
response['Content-Disposition'] = content_disposition_header(
|
||||
as_attachment=True, filename=filename
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
import tarfile
|
||||
import threading
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime
|
||||
from os import getenv, makedirs, path, remove
|
||||
from typing import Any
|
||||
@@ -53,12 +55,90 @@ class BackupRecoverError(Exception):
|
||||
"""Raised when a backup archive cannot be safely recovered."""
|
||||
|
||||
|
||||
def create_backup(name: str = default_archive_name) -> str:
|
||||
home = getenv('HOME') or ''
|
||||
archive_name = '{}-{}.tar.gz'.format(
|
||||
# gzip level for backup archives. The bulk of a backup is video/image
|
||||
# assets that are already compressed, so the default level 9 burns
|
||||
# minutes of single-core CPU on an SBC for ~no size win — measured
|
||||
# 98 s for 355 MB on a Pi 4 (~3.6 MB/s); a multi-GB library on a Pi 3
|
||||
# runs well past the browser's request timeout (issue #2987, the
|
||||
# "Get backup never downloads" report). Level 1 is ~4-5x faster and
|
||||
# within a couple of percent on size for this content mix.
|
||||
BACKUP_COMPRESSLEVEL = 1
|
||||
|
||||
# Chunk size for stream_backup(). 64 KiB matches the pipe capacity on
|
||||
# Linux so the tar producer thread and the HTTP writer interleave
|
||||
# without either side stalling on tiny reads.
|
||||
_STREAM_CHUNK_BYTES = 64 * 1024
|
||||
|
||||
|
||||
def backup_archive_name(name: str = default_archive_name) -> str:
|
||||
return '{}-{}.tar.gz'.format(
|
||||
name if name else default_archive_name,
|
||||
datetime.now().strftime('%Y-%m-%dT%H-%M-%S'),
|
||||
)
|
||||
|
||||
|
||||
def stream_backup() -> Generator[bytes, None, None]:
|
||||
"""Yield a backup tar.gz as it is being built.
|
||||
|
||||
The download path used to write the whole archive to disk before
|
||||
sending the first byte. tar+gzip of a multi-GB asset library takes
|
||||
minutes on an SBC, and a browser kills a request that has produced
|
||||
no response bytes for ~5 minutes — so "Get backup" simply never
|
||||
completed on devices with a real content library (issue #2987).
|
||||
Streaming starts the response immediately, keeps bytes flowing for
|
||||
the whole build, and as a bonus never needs staging space on the
|
||||
(often nearly full) SD card.
|
||||
|
||||
A producer thread feeds ``tarfile`` through a pipe; the generator
|
||||
reads the other end. A consumer that disconnects mid-download
|
||||
closes the read end, the producer hits ``BrokenPipeError`` and
|
||||
stops — no orphaned thread keeps taring. A producer failure
|
||||
(missing directory, unreadable file) is re-raised here once the
|
||||
stream drains, so the response aborts mid-transfer instead of
|
||||
completing 200 with a silently truncated archive.
|
||||
"""
|
||||
home = getenv('HOME') or ''
|
||||
read_fd, write_fd = os.pipe()
|
||||
produce_error: list[BaseException] = []
|
||||
|
||||
def produce() -> None:
|
||||
try:
|
||||
with os.fdopen(write_fd, 'wb') as write_file:
|
||||
with tarfile.open(
|
||||
fileobj=write_file,
|
||||
mode='w|gz',
|
||||
compresslevel=BACKUP_COMPRESSLEVEL,
|
||||
) as tar:
|
||||
for directory in directories:
|
||||
tar.add(path.join(home, directory), arcname=directory)
|
||||
except BrokenPipeError:
|
||||
logging.info('backup download cancelled by the client')
|
||||
except Exception as exc:
|
||||
logging.exception('backup stream failed')
|
||||
produce_error.append(exc)
|
||||
|
||||
producer = threading.Thread(
|
||||
target=produce, name='backup-stream', daemon=True
|
||||
)
|
||||
producer.start()
|
||||
drained = False
|
||||
try:
|
||||
with os.fdopen(read_fd, 'rb') as read_file:
|
||||
while chunk := read_file.read(_STREAM_CHUNK_BYTES):
|
||||
yield chunk
|
||||
drained = True
|
||||
finally:
|
||||
producer.join(timeout=5)
|
||||
# Only surface producer failures on the drained path: a
|
||||
# GeneratorExit (client went away) shouldn't morph into a
|
||||
# spurious error.
|
||||
if drained and produce_error:
|
||||
raise produce_error[0]
|
||||
|
||||
|
||||
def create_backup(name: str = default_archive_name) -> str:
|
||||
home = getenv('HOME') or ''
|
||||
archive_name = backup_archive_name(name)
|
||||
file_path = path.join(home, static_dir, archive_name)
|
||||
|
||||
if not path.exists(path.join(home, static_dir)):
|
||||
@@ -68,7 +148,9 @@ def create_backup(name: str = default_archive_name) -> str:
|
||||
remove(file_path)
|
||||
|
||||
try:
|
||||
with tarfile.open(file_path, 'w:gz') as tar:
|
||||
with tarfile.open(
|
||||
file_path, 'w:gz', compresslevel=BACKUP_COMPRESSLEVEL
|
||||
) as tar:
|
||||
for directory in directories:
|
||||
path_to_dir = path.join(home, directory)
|
||||
tar.add(path_to_dir, arcname=directory)
|
||||
|
||||
@@ -2,6 +2,7 @@ import os
|
||||
import shutil
|
||||
import tarfile
|
||||
import tempfile
|
||||
import threading
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
from os import path
|
||||
@@ -10,9 +11,11 @@ from unittest import mock
|
||||
import pytest
|
||||
|
||||
from anthias_server.lib.backup_helper import (
|
||||
backup_archive_name,
|
||||
create_backup,
|
||||
recover,
|
||||
static_dir,
|
||||
stream_backup,
|
||||
)
|
||||
|
||||
|
||||
@@ -59,6 +62,67 @@ def test_recover(backup_home: str) -> None:
|
||||
assert not path.isfile(file_path)
|
||||
|
||||
|
||||
def test_backup_archive_name_falls_back_on_empty_name() -> None:
|
||||
dt = datetime(2016, 7, 19, 12, 42, 12)
|
||||
with mock.patch(
|
||||
'anthias_server.lib.backup_helper.datetime'
|
||||
) as mock_datetime:
|
||||
mock_datetime.now.return_value = dt
|
||||
assert (
|
||||
backup_archive_name('')
|
||||
== 'anthias-backup-2016-07-19T12-42-12.tar.gz'
|
||||
)
|
||||
assert (
|
||||
backup_archive_name('lobby') == 'lobby-2016-07-19T12-42-12.tar.gz'
|
||||
)
|
||||
|
||||
|
||||
def test_stream_backup_round_trips_through_recover(
|
||||
backup_home: str,
|
||||
) -> None:
|
||||
# The settings page download streams the archive as it is built
|
||||
# (issue #2987: the staged-file path produced no response bytes
|
||||
# for minutes and browsers gave up). The streamed bytes must be a
|
||||
# well-formed tar.gz that recover() accepts unchanged.
|
||||
marker = path.join(backup_home, '.anthias', 'anthias.conf')
|
||||
with open(marker, 'w') as f:
|
||||
f.write('[viewer]\n')
|
||||
|
||||
chunks = list(stream_backup())
|
||||
assert chunks
|
||||
|
||||
os.makedirs(path.join(backup_home, static_dir), exist_ok=True)
|
||||
file_path = path.join(backup_home, static_dir, 'streamed.tar.gz')
|
||||
with open(file_path, 'wb') as f:
|
||||
f.write(b''.join(chunks))
|
||||
|
||||
with tarfile.open(file_path, 'r:gz') as tar:
|
||||
names = tar.getnames()
|
||||
assert '.anthias' in names
|
||||
assert 'anthias_assets' in names
|
||||
assert '.anthias/anthias.conf' in names
|
||||
|
||||
os.remove(marker)
|
||||
recover(file_path)
|
||||
assert path.isfile(marker)
|
||||
|
||||
|
||||
def test_stream_backup_stops_when_consumer_disconnects(
|
||||
backup_home: str,
|
||||
) -> None:
|
||||
# A closed browser connection must not leave the producer thread
|
||||
# taring forever — the generator's pipe close propagates as
|
||||
# BrokenPipeError and the thread exits.
|
||||
stream = stream_backup()
|
||||
assert next(stream)
|
||||
stream.close() # GeneratorExit → read end closed
|
||||
main_thread = threading.main_thread()
|
||||
for thread in threading.enumerate():
|
||||
if thread.name == 'backup-stream' and thread is not main_thread:
|
||||
thread.join(timeout=5)
|
||||
assert not thread.is_alive()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def legacy_home() -> Iterator[str]:
|
||||
"""Backups produced by pre-rename releases used `.screenly` and
|
||||
|
||||
Reference in New Issue
Block a user