mirror of
https://github.com/Screenly/Anthias.git
synced 2026-06-10 09:08:09 -04:00
fix(processing): exclude UnidentifiedImageError from autoretry
Pillow's UnidentifiedImageError inherits from OSError, so it was getting caught by normalize_image_asset's autoretry_for=(OSError,) filter — a corrupt-image upload would retry up to 3 times with exponential backoff before metadata.error_message landed. Add it to dont_autoretry_for alongside FileNotFoundError so the permanent-failure contract surfaces immediately. Test extended to assert both exclusions are in place at celery-config time.
This commit is contained in:
@@ -9,6 +9,7 @@ from typing import Any
|
||||
import django
|
||||
import sh
|
||||
from celery import Celery, Task
|
||||
from PIL import UnidentifiedImageError
|
||||
from tenacity import Retrying, stop_after_attempt, wait_fixed
|
||||
|
||||
django.setup()
|
||||
@@ -785,32 +786,38 @@ NORMALIZE_VIDEO_TIME_LIMIT_S = processing.NORMALIZE_VIDEO_TIME_LIMIT_S
|
||||
base=processing._NormalizeAssetTask,
|
||||
time_limit=300,
|
||||
autoretry_for=(OSError,),
|
||||
# ``FileNotFoundError`` is-a ``OSError`` so the autoretry_for
|
||||
# filter would catch it — but a missing source file isn't a
|
||||
# transient condition; retrying just keeps the row in
|
||||
# ``is_processing=True`` longer before the inevitable
|
||||
# ``on_failure`` lands. Excluding it here lets the on_failure
|
||||
# path write ``metadata.error_message`` immediately.
|
||||
dont_autoretry_for=(FileNotFoundError,),
|
||||
# Two OSError subclasses are permanent and must NOT trigger a
|
||||
# retry — they'd just keep the row in ``is_processing=True``
|
||||
# longer before the inevitable on_failure lands:
|
||||
# * ``FileNotFoundError`` — source file gone between row
|
||||
# creation and pickup (cleanup raced operator, disk pressure).
|
||||
# * ``UnidentifiedImageError`` — Pillow decoded the header far
|
||||
# enough to refuse the file (corrupt HEIC, truncated TIFF,
|
||||
# mis-typed extension). It inherits from OSError so the
|
||||
# autoretry_for filter would otherwise sweep it up; listing
|
||||
# it explicitly here makes the failure surface immediately.
|
||||
dont_autoretry_for=(FileNotFoundError, UnidentifiedImageError),
|
||||
retry_backoff=10,
|
||||
retry_backoff_max=300,
|
||||
retry_jitter=True,
|
||||
max_retries=2,
|
||||
)
|
||||
def normalize_image_asset(asset_id: str) -> None:
|
||||
"""Convert HEIC / HEIF / TIFF uploads to lossless WebP.
|
||||
"""Convert every extension in
|
||||
``processing.NORMALIZE_IMAGE_EXTS`` (HEIC / HEIF / TIFF / BMP /
|
||||
ICO / TGA / JPEG 2000 family / AVIF) to lossless WebP.
|
||||
|
||||
No-ops if the row is missing or has already been finalised
|
||||
(duplicate task fire / operator-edited row). Permanent decode
|
||||
failures (corrupt HEIC) raise so ``_NormalizeAssetTask.on_failure``
|
||||
writes ``metadata.error_message`` and clears ``is_processing``.
|
||||
|
||||
Retries on OSError covers transient disk pressure / a temporary
|
||||
libheif read hiccup; Pillow's UnidentifiedImageError is permanent
|
||||
and lands directly on on_failure. ``FileNotFoundError`` (source
|
||||
file gone between row creation and pickup) is excluded from
|
||||
autoretry — see the ``dont_autoretry_for`` rationale on the
|
||||
decorator.
|
||||
Retries on OSError cover transient disk pressure / a temporary
|
||||
libheif read hiccup. Two OSError subclasses are excluded from
|
||||
autoretry via ``dont_autoretry_for`` because they're permanent:
|
||||
``FileNotFoundError`` (source file gone) and Pillow's
|
||||
``UnidentifiedImageError`` (corrupt input). See the decorator's
|
||||
inline comment for the rationale.
|
||||
"""
|
||||
asset = processing._row_or_none(asset_id)
|
||||
if asset is None:
|
||||
|
||||
@@ -1412,15 +1412,21 @@ def test_normalize_image_asset_celery_no_op_when_row_missing() -> None:
|
||||
run.assert_not_called()
|
||||
|
||||
|
||||
def test_normalize_tasks_exclude_filenotfounderror_from_autoretry() -> None:
|
||||
def test_normalize_tasks_exclude_permanent_oserrors_from_autoretry() -> None:
|
||||
"""Both normalisation tasks have ``autoretry_for=(OSError,)`` so a
|
||||
transient disk hiccup is retried automatically. ``FileNotFoundError``
|
||||
is-a ``OSError`` so the autoretry filter would otherwise catch it
|
||||
too — but a missing source file is permanent, and retrying just
|
||||
delays the on_failure that writes ``metadata.error_message``.
|
||||
Confirm both tasks were registered with
|
||||
``dont_autoretry_for=(FileNotFoundError,)`` so the exclusion is
|
||||
in effect at celery-config time.
|
||||
transient disk hiccup is retried automatically. Several OSError
|
||||
subclasses are permanent and must be excluded so they land on
|
||||
on_failure immediately:
|
||||
|
||||
* ``FileNotFoundError`` — source file disappeared between row
|
||||
creation and pickup.
|
||||
* ``UnidentifiedImageError`` (image task only) — Pillow refused
|
||||
to decode the file. Inherits from OSError, so without the
|
||||
explicit exclusion the autoretry filter would sweep it up.
|
||||
|
||||
Confirms the exclusions are in effect at celery-config time so a
|
||||
future change to the decorators can't silently regress the
|
||||
immediate-fail contract.
|
||||
"""
|
||||
from anthias_server.celery_tasks import (
|
||||
normalize_image_asset,
|
||||
@@ -1444,6 +1450,16 @@ def test_normalize_tasks_exclude_filenotfounderror_from_autoretry() -> None:
|
||||
f'FileNotFoundError so missing-source raises immediately'
|
||||
)
|
||||
|
||||
# Image task additionally excludes UnidentifiedImageError (Pillow
|
||||
# only — video has no Pillow path).
|
||||
assert UnidentifiedImageError in tuple(
|
||||
getattr(normalize_image_asset, 'dont_autoretry_for', ())
|
||||
), (
|
||||
'normalize_image_asset expected dont_autoretry_for to include '
|
||||
'UnidentifiedImageError so corrupt-image raises immediately '
|
||||
'(it inherits from OSError)'
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_normalize_on_failure_writes_error_metadata(
|
||||
|
||||
Reference in New Issue
Block a user