mirror of
https://github.com/Screenly/Anthias.git
synced 2026-06-10 09:08:09 -04:00
fix(processing): address Copilot review on commit 8602faff
Six items from Copilot's fresh review pass: * ``assets_upload`` last-resort image-extension allowlist now derives from ``processing.NORMALIZE_IMAGE_EXTS`` rather than duplicating the set. Adding a new normalisable format (or removing one) only touches one place. * ``_run_image_normalisation`` cleans up the ``.webp.tmp`` staging file on every failure path — Pillow's ``UnidentifiedImageError`` *and* a generic OSError mid-encode (disk pressure, libheif crash). Mirrors the video pipeline's _drop_staging contract. * ffmpeg failure messages decode the bytes ``stderr`` to UTF-8 text (with replacement on malformed bytes) and tail-trim long output, so ``metadata.error_message`` reads as a real diagnostic instead of ``b'Invalid data found'``. * Removed the dead ``path.normpath(staging) == path.normpath( src_uri)`` branch in the video transcode path. With the staging suffix in place the two paths can never collide; expanded the surrounding comment to explain why. * Updated ``normalize_video_asset``'s docstring to describe the per-board codec grid (libx264 on pi2/pi3, libx265 on pi4-64 / pi5 / x86) rather than the now-stale "transcode to H.264 MP4". * Fixed "truecate" → "truncate" typo in ``_asset_row.html`` comment. New tests: * ``test_image_partial_write_cleans_staging`` — half-writes the ``.webp.tmp`` then raises OSError; asserts the runner removes the partial file before propagating. * ``test_format_subprocess_stderr_decodes_and_trims`` — covers the bytes-decode, malformed-byte-replacement, tail-trim, and empty-stderr cases for the new helper. * ``test_video_ffmpeg_error_cleans_staging`` strengthened to assert the error message contains *no* ``b'...'`` Python repr prefix — it's now operator-readable text.
This commit is contained in:
@@ -46,7 +46,7 @@
|
||||
{% comment %} Processing failed and the celery task wrote
|
||||
metadata.error_message via _NormalizeAssetTask.on_failure. Show
|
||||
a small badge with a hover tooltip carrying the full message
|
||||
(truecate via the title attribute, no extra JS) so the operator
|
||||
(truncate via the title attribute, no extra JS) so the operator
|
||||
can see *why* the row never finalised, edit/delete it, or
|
||||
re-upload a fixed file. {% endcomment %}
|
||||
<span class="error-pill"
|
||||
|
||||
@@ -265,11 +265,14 @@ def assets_upload(request: HttpRequest) -> HttpResponse:
|
||||
if client_type.split('/')[0] in ('image', 'video'):
|
||||
file_type = client_type
|
||||
else:
|
||||
# Last resort: classify by extension. Mirrors the format
|
||||
# set the normalisation pipeline can handle so a typo'd
|
||||
# extension still falls through to the rejection branch
|
||||
# below rather than silently routing through the wrong
|
||||
# branch.
|
||||
# Last resort: classify by extension. The image set is the
|
||||
# union of "viewer-friendly direct uploads" (jpg/png/...
|
||||
# — never normalised) and ``NORMALIZE_IMAGE_EXTS`` (the
|
||||
# full set the upload pipeline knows how to convert). The
|
||||
# latter is a single source of truth, so adding a new
|
||||
# normalisable format only touches one place.
|
||||
from anthias_server.processing import NORMALIZE_IMAGE_EXTS
|
||||
|
||||
ext = path.splitext(upload_name)[1].lower()
|
||||
image_exts = {
|
||||
'.jpg',
|
||||
@@ -278,12 +281,7 @@ def assets_upload(request: HttpRequest) -> HttpResponse:
|
||||
'.gif',
|
||||
'.webp',
|
||||
'.svg',
|
||||
'.bmp',
|
||||
'.heic',
|
||||
'.heif',
|
||||
'.tif',
|
||||
'.tiff',
|
||||
}
|
||||
} | NORMALIZE_IMAGE_EXTS
|
||||
video_exts = {
|
||||
'.mp4',
|
||||
'.mov',
|
||||
|
||||
@@ -818,7 +818,14 @@ def normalize_image_asset(asset_id: str) -> None:
|
||||
max_retries=1,
|
||||
)
|
||||
def normalize_video_asset(asset_id: str) -> None:
|
||||
"""Probe the upload and either passthrough or transcode to H.264 MP4.
|
||||
"""Probe the upload; passthrough or transcode to a board-appropriate
|
||||
codec in MP4.
|
||||
|
||||
The output codec is decided by ``processing._resolve_board_profile``:
|
||||
libx264 on legacy Pi 2/Pi 3 (mmal-vc4 path; no HEVC hardware) and
|
||||
libx265 with the iOS-friendly ``-tag:v hvc1`` on Pi 4-64 / Pi 5 /
|
||||
x86 (mpv path; HEVC hardware-decoded on Pi 4 / x86, software on
|
||||
Pi 5). The on-device player only ever sees a codec it can decode.
|
||||
|
||||
ffmpeg is wrapped with ``-threads 2`` so two cores stay free for
|
||||
the on-device viewer; the celery worker itself runs under
|
||||
|
||||
@@ -332,6 +332,37 @@ def _ext(filename: str) -> str:
|
||||
return path.splitext(filename)[1].lower()
|
||||
|
||||
|
||||
# Operator-facing diagnostics surface via ``metadata.error_message``,
|
||||
# which renders as a hover tooltip on the row's "Failed" pill — bytes
|
||||
# repr (``b'...'``) reads as gibberish there. ``_STDERR_TAIL_BYTES``
|
||||
# trims ffmpeg's pre-amble (build info, configuration, library
|
||||
# versions) so the operator sees the actual error line, not 4 KB of
|
||||
# noise.
|
||||
_STDERR_TAIL_BYTES = 800
|
||||
|
||||
|
||||
def _format_subprocess_stderr(exc: sh.ErrorReturnCode) -> str:
|
||||
"""Decode the tail of a subprocess's stderr to a readable string.
|
||||
|
||||
``sh.ErrorReturnCode.stderr`` is ``bytes``; returning it via
|
||||
``f'{exc.stderr!r}'`` would surface ``b'...'`` to the operator.
|
||||
Decode as UTF-8 with replacement (so a malformed byte doesn't
|
||||
crash the error path), strip whitespace, keep only the tail —
|
||||
ffmpeg's diagnostic always lands at the end. ``replace`` rather
|
||||
than ``ignore`` so we never silently swallow a broken byte that
|
||||
might have been the only signal.
|
||||
"""
|
||||
raw = exc.stderr or b''
|
||||
if isinstance(raw, str):
|
||||
text = raw
|
||||
else:
|
||||
text = raw.decode('utf-8', errors='replace')
|
||||
text = text.strip()
|
||||
if len(text) > _STDERR_TAIL_BYTES:
|
||||
text = '…' + text[-_STDERR_TAIL_BYTES:]
|
||||
return text
|
||||
|
||||
|
||||
def _set_processing_error(asset_id: str, message: str) -> None:
|
||||
"""Persist a human-readable error and clear is_processing.
|
||||
|
||||
@@ -484,14 +515,33 @@ def _run_image_normalisation(asset: Asset) -> None:
|
||||
# already sweeps stale .tmp after 1h.
|
||||
staging = f'{final_uri}.tmp'
|
||||
|
||||
def _drop_image_staging() -> None:
|
||||
# Mirror the video-pipeline cleanup contract: every failure
|
||||
# path through ``_convert_image_to_webp`` removes the staging
|
||||
# ``.webp.tmp`` before propagating, so a partial Pillow write
|
||||
# (disk pressure, libheif crash mid-decode) never leaves
|
||||
# debris for the operator to trip over.
|
||||
try:
|
||||
os.remove(staging)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try:
|
||||
_convert_image_to_webp(src_uri, staging)
|
||||
except UnidentifiedImageError as exc:
|
||||
# Pillow couldn't decode — almost always a corrupt upload.
|
||||
# Re-raise with a clearer name; on_failure formats the message.
|
||||
_drop_image_staging()
|
||||
raise UnidentifiedImageError(
|
||||
f'could not decode image {src_uri!r}: {exc}'
|
||||
) from exc
|
||||
except Exception:
|
||||
# Any other failure (OSError / disk pressure, libheif crash,
|
||||
# WebP encoder rejecting the mode) must also clean up before
|
||||
# bubbling. ``except Exception`` rather than ``BaseException``
|
||||
# so KeyboardInterrupt / SystemExit still abort cleanly.
|
||||
_drop_image_staging()
|
||||
raise
|
||||
|
||||
# Atomic rename within the same dir — POSIX guarantees this is
|
||||
# observed as a single inode swap. os.replace overwrites an
|
||||
@@ -764,13 +814,15 @@ def _run_video_normalisation(asset: Asset) -> None:
|
||||
# GCed by the orphan-file sweep.
|
||||
base_no_ext = path.splitext(src_uri)[0]
|
||||
final_uri = f'{base_no_ext}.mp4'
|
||||
# ``staging`` deliberately uses a ``.staging.mp4`` suffix rather
|
||||
# than ``.mp4.tmp``: ffmpeg picks its muxer from the output
|
||||
# extension, and ``.tmp`` makes it bail with "Unable to choose an
|
||||
# output format". The suffix also guarantees ``staging != src_uri``
|
||||
# for the in-place transcode case (a non-h264 ``.mp4`` whose
|
||||
# ``base_no_ext`` matches): ffmpeg keeps reading from src_uri while
|
||||
# writing to a distinct path. ``os.replace`` then atomically swaps
|
||||
# the input out for the transcoded output.
|
||||
staging = f'{base_no_ext}.staging.mp4'
|
||||
# Edge case: source already lives at ``<base>.mp4`` (a non-h264
|
||||
# .mp4 fell through here). The staging name must NOT collide
|
||||
# with the source on rename — using a distinct suffix keeps the
|
||||
# input safe to read while ffmpeg writes to the staging file.
|
||||
if path.normpath(staging) == path.normpath(src_uri):
|
||||
staging = f'{base_no_ext}.staging.transcoded.mp4'
|
||||
|
||||
def _drop_staging() -> None:
|
||||
# All transcode failure paths converge through this helper so
|
||||
@@ -792,8 +844,13 @@ def _run_video_normalisation(asset: Asset) -> None:
|
||||
raise RuntimeError(f'ffmpeg timed out for {src_uri!r}: {exc}') from exc
|
||||
except sh.ErrorReturnCode as exc:
|
||||
_drop_staging()
|
||||
# ``exc.stderr`` is bytes; ``!r`` would render it as
|
||||
# ``b'...'`` in the operator-facing metadata.error_message.
|
||||
# Decode + trim the tail for readability — ffmpeg's last few
|
||||
# lines of stderr are usually the diagnostic, the rest is
|
||||
# build-info noise.
|
||||
raise RuntimeError(
|
||||
f'ffmpeg failed for {src_uri!r}: {exc.stderr!r}'
|
||||
f'ffmpeg failed for {src_uri!r}: {_format_subprocess_stderr(exc)}'
|
||||
) from exc
|
||||
|
||||
if not path.isfile(staging) or os.stat(staging).st_size == 0:
|
||||
|
||||
@@ -331,11 +331,45 @@ def test_image_corrupt_input_raises_clean_error(asset_dir: str) -> None:
|
||||
processing._run_image_normalisation(asset)
|
||||
|
||||
# No webp produced; the source file is left in place for the
|
||||
# operator to inspect / re-upload.
|
||||
# operator to inspect / re-upload. Staging .webp.tmp must also
|
||||
# be gone — same contract as the video pipeline's _drop_staging.
|
||||
assert not path.exists(path.join(asset_dir, 'broken.webp'))
|
||||
assert not path.exists(path.join(asset_dir, 'broken.webp.tmp'))
|
||||
assert path.exists(src)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_image_partial_write_cleans_staging(asset_dir: str) -> None:
|
||||
"""If Pillow writes some bytes to the staging file and *then*
|
||||
raises mid-encode (disk pressure, codec crash), the runner must
|
||||
clean up the partial .webp.tmp before propagating. Mocking
|
||||
``_convert_image_to_webp`` to half-write + raise is the cheapest
|
||||
way to exercise that path deterministically — a real OSError
|
||||
mid-WebP-encode is hard to provoke from userspace."""
|
||||
src = path.join(asset_dir, 'fixture.tiff')
|
||||
_write_image(src, 'TIFF')
|
||||
asset = _make_processing_asset('img-partial', src)
|
||||
|
||||
def half_write(_in: str, staging: str) -> None:
|
||||
with open(staging, 'wb') as fh:
|
||||
fh.write(b'partial WebP bytes')
|
||||
raise OSError('disk full mid-encode')
|
||||
|
||||
with (
|
||||
mock.patch.object(processing, '_notify'),
|
||||
mock.patch.object(
|
||||
processing, '_convert_image_to_webp', side_effect=half_write
|
||||
),
|
||||
):
|
||||
with pytest.raises(OSError, match='disk full'):
|
||||
processing._run_image_normalisation(asset)
|
||||
|
||||
# No leftover .webp.tmp in the asset dir — the runner removed it
|
||||
# before the raise propagated.
|
||||
leftover = [n for n in os.listdir(asset_dir) if n.endswith('.webp.tmp')]
|
||||
assert not leftover, f'image staging leftover: {leftover}'
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_image_missing_file_raises_filenotfound(asset_dir: str) -> None:
|
||||
"""Source file disappeared between row creation and task
|
||||
@@ -739,7 +773,65 @@ def test_video_ffmpeg_error_cleans_staging(asset_dir: str) -> None:
|
||||
with pytest.raises(RuntimeError) as excinfo:
|
||||
processing._run_video_normalisation(asset)
|
||||
|
||||
assert 'Invalid data found' in str(excinfo.value)
|
||||
msg = str(excinfo.value)
|
||||
assert 'Invalid data found' in msg
|
||||
# The error message goes straight into metadata.error_message
|
||||
# which renders on the operator-facing "Failed" pill — must NOT
|
||||
# contain a Python bytes repr (``b'...'``) wrapper.
|
||||
assert "b'Invalid" not in msg, (
|
||||
'stderr should be decoded for operator display'
|
||||
)
|
||||
|
||||
|
||||
def test_format_subprocess_stderr_decodes_and_trims() -> None:
|
||||
"""``_format_subprocess_stderr`` must produce operator-readable
|
||||
text: bytes decoded as UTF-8 (with replacement for malformed
|
||||
bytes), no ``b'...'`` wrapper, very long output trimmed to its
|
||||
tail (where ffmpeg's actual diagnostic lives)."""
|
||||
# 1) Plain bytes decode cleanly.
|
||||
exc = sh.ErrorReturnCode_1(
|
||||
full_cmd='ffmpeg ...',
|
||||
stdout=b'',
|
||||
stderr=b'Invalid data found in input\n',
|
||||
truncate=False,
|
||||
)
|
||||
out = processing._format_subprocess_stderr(exc)
|
||||
assert out == 'Invalid data found in input'
|
||||
assert "b'" not in out
|
||||
|
||||
# 2) Malformed UTF-8 doesn't crash — replacement char is fine.
|
||||
exc = sh.ErrorReturnCode_1(
|
||||
full_cmd='ffmpeg ...',
|
||||
stdout=b'',
|
||||
stderr=b'broken\xff byte',
|
||||
truncate=False,
|
||||
)
|
||||
out = processing._format_subprocess_stderr(exc)
|
||||
assert 'broken' in out and 'byte' in out
|
||||
|
||||
# 3) Long stderr is tail-trimmed with an ellipsis prefix so the
|
||||
# operator sees the diagnostic, not 4 KB of build-info preamble.
|
||||
long_tail = 'final-error-line'
|
||||
big = b'x' * 2000 + long_tail.encode()
|
||||
exc = sh.ErrorReturnCode_1(
|
||||
full_cmd='ffmpeg ...',
|
||||
stdout=b'',
|
||||
stderr=big,
|
||||
truncate=False,
|
||||
)
|
||||
out = processing._format_subprocess_stderr(exc)
|
||||
assert out.startswith('…')
|
||||
assert long_tail in out
|
||||
assert len(out) <= processing._STDERR_TAIL_BYTES + 1
|
||||
|
||||
# 4) Empty stderr returns the empty string, not "b''".
|
||||
exc = sh.ErrorReturnCode_1(
|
||||
full_cmd='ffmpeg ...',
|
||||
stdout=b'',
|
||||
stderr=b'',
|
||||
truncate=False,
|
||||
)
|
||||
assert processing._format_subprocess_stderr(exc) == ''
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
|
||||
Reference in New Issue
Block a user