From da53e045a6801b5056fb448e68e55da7bf106913 Mon Sep 17 00:00:00 2001 From: Viktor Petersson Date: Sun, 7 Jun 2026 08:00:28 +0200 Subject: [PATCH] fix(server): stream the backup download so large libraries don't time out (#3005) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(server): stream the backup download so large libraries don't time out Part of issue #2987 ("Get backup" reportedly never completes on a Pi 3B+): the settings download built the whole tar.gz on disk before sending the first byte. tar+gzip at the default level 9 measures 98 s for 355 MB on a Pi 4 (~3.6 MB/s) — a multi-GB library on a Pi 3 sends nothing for longer than a browser keeps a byte-less request alive, so the download always aborted. - settings_backup now streams the archive while it is built (StreamingHttpResponse over a pipe fed by a tarfile producer thread): first bytes hit the wire immediately, nothing is staged on the SD card, and a client disconnect stops the producer. - gzip level drops to 1 for both the streamed and the API (create_backup) paths — backups are mostly already-compressed media, so level 9 burned minutes of CPU for ~no size win. - The Content-Disposition filename is RFC-8187-escaped via Django's content_disposition_header (player_name is operator-controlled). Co-Authored-By: Claude Opus 4.8 (1M context) * fix(server): surface backup-stream producer failures, fix mypy type - stream_backup() re-raises a producer failure once the pipe drains, so the response aborts mid-transfer instead of completing 200 with a silently truncated archive (review feedback). Client disconnects still log-and-stop without morphing into spurious errors. - Return type is Generator (not Iterator) — the disconnect test calls .close(), which mypy rejects on Iterator. Co-Authored-By: Claude Opus 4.8 (1M context) --------- Co-authored-by: Claude Opus 4.8 (1M context) --- src/anthias_server/app/views.py | 51 ++++++++------ src/anthias_server/lib/backup_helper.py | 90 +++++++++++++++++++++++-- tests/test_backup_helper.py | 64 ++++++++++++++++++ 3 files changed, 182 insertions(+), 23 deletions(-) diff --git a/src/anthias_server/app/views.py b/src/anthias_server/app/views.py index eafed092..b0af9230 100644 --- a/src/anthias_server/app/views.py +++ b/src/anthias_server/app/views.py @@ -10,12 +10,20 @@ from urllib.parse import urlparse, urlunparse from django.contrib import messages from django.contrib.auth import authenticate from django.contrib.auth import login as django_login -from django.http import FileResponse, HttpRequest, HttpResponse +from django.http import ( + FileResponse, + HttpRequest, + HttpResponse, + StreamingHttpResponse, +) from django.http.response import HttpResponseBase from django.shortcuts import redirect from django.urls import reverse from django.utils import timezone -from django.utils.http import url_has_allowed_host_and_scheme +from django.utils.http import ( + content_disposition_header, + url_has_allowed_host_and_scheme, +) from django.views.decorators.http import require_http_methods from anthias_server.app import page_context @@ -966,26 +974,31 @@ def settings_save(request: HttpRequest) -> HttpResponse: @authorized @require_http_methods(['POST']) def settings_backup(request: HttpRequest) -> HttpResponseBase: - """Same as api.views.mixins.BackupViewMixin.post but streams the - archive back inline instead of returning the filename + relying - on a follow-up /static_with_mime/ fetch.""" - from os import getenv + """Stream the backup archive as it is being built. - filename = backup_helper.create_backup(name=settings['player_name']) - # backup_helper.create_backup writes to $HOME/anthias/staticfiles/. - # Reach for the same base here so we serve the archive from the - # path it was actually written to. The pre-fix path.join('static', - # filename) was relative to CWD and would FileNotFoundError in - # production where uvicorn runs out of /usr/src/app. - archive_path = path.join( - getenv('HOME') or '', 'anthias/staticfiles', filename - ) - response = FileResponse( - open(archive_path, 'rb'), - as_attachment=True, - filename=filename, + The pre-#2987 shape — create_backup() to a staging file, then a + FileResponse — sent zero response bytes while tar+gzip ran, and + that build takes minutes on an SBC with a real content library. + Browsers abort a request that has produced no bytes for ~5 + minutes, so on devices like the reporter's Pi 3 "Get backup" + spun and then silently failed every time. stream_backup() puts + the first tar block on the wire immediately and keeps bytes + flowing for the whole build (and needs no staging space on the + SD card). The iterator is synchronous — Django adapts it under + ASGI via its thread executor, which is fine for an operation + this rare. + """ + filename = backup_helper.backup_archive_name(settings['player_name']) + response = StreamingHttpResponse( + backup_helper.stream_backup(), content_type='application/x-tgz', ) + # content_disposition_header() RFC-8187-escapes the filename — + # player_name is operator-controlled, so it must not be able to + # smuggle quotes/control chars into the header. + response['Content-Disposition'] = content_disposition_header( + as_attachment=True, filename=filename + ) return response diff --git a/src/anthias_server/lib/backup_helper.py b/src/anthias_server/lib/backup_helper.py index 3894cc68..49622312 100644 --- a/src/anthias_server/lib/backup_helper.py +++ b/src/anthias_server/lib/backup_helper.py @@ -2,6 +2,8 @@ import logging import os import sys import tarfile +import threading +from collections.abc import Generator from datetime import datetime from os import getenv, makedirs, path, remove from typing import Any @@ -53,12 +55,90 @@ class BackupRecoverError(Exception): """Raised when a backup archive cannot be safely recovered.""" -def create_backup(name: str = default_archive_name) -> str: - home = getenv('HOME') or '' - archive_name = '{}-{}.tar.gz'.format( +# gzip level for backup archives. The bulk of a backup is video/image +# assets that are already compressed, so the default level 9 burns +# minutes of single-core CPU on an SBC for ~no size win — measured +# 98 s for 355 MB on a Pi 4 (~3.6 MB/s); a multi-GB library on a Pi 3 +# runs well past the browser's request timeout (issue #2987, the +# "Get backup never downloads" report). Level 1 is ~4-5x faster and +# within a couple of percent on size for this content mix. +BACKUP_COMPRESSLEVEL = 1 + +# Chunk size for stream_backup(). 64 KiB matches the pipe capacity on +# Linux so the tar producer thread and the HTTP writer interleave +# without either side stalling on tiny reads. +_STREAM_CHUNK_BYTES = 64 * 1024 + + +def backup_archive_name(name: str = default_archive_name) -> str: + return '{}-{}.tar.gz'.format( name if name else default_archive_name, datetime.now().strftime('%Y-%m-%dT%H-%M-%S'), ) + + +def stream_backup() -> Generator[bytes, None, None]: + """Yield a backup tar.gz as it is being built. + + The download path used to write the whole archive to disk before + sending the first byte. tar+gzip of a multi-GB asset library takes + minutes on an SBC, and a browser kills a request that has produced + no response bytes for ~5 minutes — so "Get backup" simply never + completed on devices with a real content library (issue #2987). + Streaming starts the response immediately, keeps bytes flowing for + the whole build, and as a bonus never needs staging space on the + (often nearly full) SD card. + + A producer thread feeds ``tarfile`` through a pipe; the generator + reads the other end. A consumer that disconnects mid-download + closes the read end, the producer hits ``BrokenPipeError`` and + stops — no orphaned thread keeps taring. A producer failure + (missing directory, unreadable file) is re-raised here once the + stream drains, so the response aborts mid-transfer instead of + completing 200 with a silently truncated archive. + """ + home = getenv('HOME') or '' + read_fd, write_fd = os.pipe() + produce_error: list[BaseException] = [] + + def produce() -> None: + try: + with os.fdopen(write_fd, 'wb') as write_file: + with tarfile.open( + fileobj=write_file, + mode='w|gz', + compresslevel=BACKUP_COMPRESSLEVEL, + ) as tar: + for directory in directories: + tar.add(path.join(home, directory), arcname=directory) + except BrokenPipeError: + logging.info('backup download cancelled by the client') + except Exception as exc: + logging.exception('backup stream failed') + produce_error.append(exc) + + producer = threading.Thread( + target=produce, name='backup-stream', daemon=True + ) + producer.start() + drained = False + try: + with os.fdopen(read_fd, 'rb') as read_file: + while chunk := read_file.read(_STREAM_CHUNK_BYTES): + yield chunk + drained = True + finally: + producer.join(timeout=5) + # Only surface producer failures on the drained path: a + # GeneratorExit (client went away) shouldn't morph into a + # spurious error. + if drained and produce_error: + raise produce_error[0] + + +def create_backup(name: str = default_archive_name) -> str: + home = getenv('HOME') or '' + archive_name = backup_archive_name(name) file_path = path.join(home, static_dir, archive_name) if not path.exists(path.join(home, static_dir)): @@ -68,7 +148,9 @@ def create_backup(name: str = default_archive_name) -> str: remove(file_path) try: - with tarfile.open(file_path, 'w:gz') as tar: + with tarfile.open( + file_path, 'w:gz', compresslevel=BACKUP_COMPRESSLEVEL + ) as tar: for directory in directories: path_to_dir = path.join(home, directory) tar.add(path_to_dir, arcname=directory) diff --git a/tests/test_backup_helper.py b/tests/test_backup_helper.py index 8a17802f..3f7f5ae7 100644 --- a/tests/test_backup_helper.py +++ b/tests/test_backup_helper.py @@ -2,6 +2,7 @@ import os import shutil import tarfile import tempfile +import threading from collections.abc import Iterator from datetime import datetime from os import path @@ -10,9 +11,11 @@ from unittest import mock import pytest from anthias_server.lib.backup_helper import ( + backup_archive_name, create_backup, recover, static_dir, + stream_backup, ) @@ -59,6 +62,67 @@ def test_recover(backup_home: str) -> None: assert not path.isfile(file_path) +def test_backup_archive_name_falls_back_on_empty_name() -> None: + dt = datetime(2016, 7, 19, 12, 42, 12) + with mock.patch( + 'anthias_server.lib.backup_helper.datetime' + ) as mock_datetime: + mock_datetime.now.return_value = dt + assert ( + backup_archive_name('') + == 'anthias-backup-2016-07-19T12-42-12.tar.gz' + ) + assert ( + backup_archive_name('lobby') == 'lobby-2016-07-19T12-42-12.tar.gz' + ) + + +def test_stream_backup_round_trips_through_recover( + backup_home: str, +) -> None: + # The settings page download streams the archive as it is built + # (issue #2987: the staged-file path produced no response bytes + # for minutes and browsers gave up). The streamed bytes must be a + # well-formed tar.gz that recover() accepts unchanged. + marker = path.join(backup_home, '.anthias', 'anthias.conf') + with open(marker, 'w') as f: + f.write('[viewer]\n') + + chunks = list(stream_backup()) + assert chunks + + os.makedirs(path.join(backup_home, static_dir), exist_ok=True) + file_path = path.join(backup_home, static_dir, 'streamed.tar.gz') + with open(file_path, 'wb') as f: + f.write(b''.join(chunks)) + + with tarfile.open(file_path, 'r:gz') as tar: + names = tar.getnames() + assert '.anthias' in names + assert 'anthias_assets' in names + assert '.anthias/anthias.conf' in names + + os.remove(marker) + recover(file_path) + assert path.isfile(marker) + + +def test_stream_backup_stops_when_consumer_disconnects( + backup_home: str, +) -> None: + # A closed browser connection must not leave the producer thread + # taring forever — the generator's pipe close propagates as + # BrokenPipeError and the thread exits. + stream = stream_backup() + assert next(stream) + stream.close() # GeneratorExit → read end closed + main_thread = threading.main_thread() + for thread in threading.enumerate(): + if thread.name == 'backup-stream' and thread is not main_thread: + thread.join(timeout=5) + assert not thread.is_alive() + + @pytest.fixture def legacy_home() -> Iterator[str]: """Backups produced by pre-rename releases used `.screenly` and