diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml index 297aae9b..695c5248 100644 --- a/.github/workflows/test-runner.yml +++ b/.github/workflows/test-runner.yml @@ -125,14 +125,18 @@ jobs: env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - # Browser-test failure artifacts (Chrome screenshot + rendered - # HTML + console log) are written by tests/conftest.py's - # pytest_runtest_makereport hook into ./test-artifacts/. Same + # Browser-test failure artifacts (Playwright trace.zip + + # full-page screenshot per failed test) are written by + # pytest-playwright via the + # `--tracing retain-on-failure --screenshot only-on-failure + # --output test-artifacts` addopts in pyproject.toml. Same # bind-mount story as coverage.xml: anything inside the test # container under /usr/src/app shows up directly on the runner. - # `if: failure()` keeps green runs from uploading an empty bundle; - # `if-no-files-found: ignore` is a belt for the case where a - # failure is in setup/teardown and never reaches the hook. + # `if: failure()` keeps green runs from uploading an empty + # bundle; `if-no-files-found: ignore` is a belt for the case + # where the failure happens before any test reaches the + # tracing-stop call. `playwright show-trace trace.zip` replays + # the test interactively when downloading the bundle locally. - name: Upload integration test artifacts if: failure() && inputs.test-type == 'python' uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 diff --git a/src/anthias_server/app/views.py b/src/anthias_server/app/views.py index 5ddd8b68..197df6c2 100644 --- a/src/anthias_server/app/views.py +++ b/src/anthias_server/app/views.py @@ -280,12 +280,14 @@ def assets_update(request: HttpRequest, asset_id: str) -> HttpResponse: # row marked as 'webpage', etc.). The edit modal renders mimetype # read-only for the same reason. if asset.mimetype == 'video': - # Video assets must keep duration=0 — the API enforces this on - # writes and the playlist scheduler reads the real length back - # from the file at playtime. The edit form disables the input - # for videos, but defend on the server too so a hand-crafted - # POST can't desync the DB from the API contract. - asset.duration = 0 + # Video duration is owned by the probe_video_duration Celery + # task, not the edit form. Leave the persisted value alone so + # an edit doesn't clobber the probed length back to 0. The + # edit form already disables the duration input for videos + # (`:disabled="editAsset.mimetype === 'video'"`); this branch + # is the matching server-side guard against hand-crafted POSTs + # that try to write a duration anyway. + pass else: asset.duration = int( request.POST.get('duration') or asset.duration or 0 diff --git a/src/anthias_server/celery_tasks.py b/src/anthias_server/celery_tasks.py index 58b1afe7..65b6c3e1 100755 --- a/src/anthias_server/celery_tasks.py +++ b/src/anthias_server/celery_tasks.py @@ -8,7 +8,7 @@ from typing import Any import django import sh -from celery import Celery +from celery import Celery, Task from tenacity import Retrying, stop_after_attempt, wait_fixed django.setup() @@ -205,7 +205,47 @@ def cleanup() -> None: logging.warning('cleanup: could not remove %s: %s', entry.path, e) +class _ProbeVideoTask(Task): # type: ignore[type-arg] + """Custom Task subclass so ``on_failure`` can clear + ``is_processing`` when retries are exhausted. + + Without this, a row whose probe permanently fails (e.g. ffprobe + missing on a stripped-down image, or 3 consecutive ffprobe + timeouts) stays at "Processing" forever and the operator has no + way to interact with it. Celery calls ``on_failure`` after + ``max_retries`` is exhausted *or* on any non-autoretry exception + that escapes the task body. + """ + + def on_failure( + self, + exc: BaseException, + task_id: str, + args: tuple[Any, ...], + kwargs: dict[str, Any], + einfo: Any, + ) -> None: + asset_id = args[0] if args else kwargs.get('asset_id') + if not asset_id: + return + try: + Asset.objects.filter(asset_id=asset_id).update(is_processing=False) + # Same WS nudge the success path sends so the row drops + # the "Processing" pill without waiting for the 5s table + # poll. Operator then has the row in its terminal state + # and can edit/delete it. + from anthias_server.app.consumers import notify_asset_update + + notify_asset_update(asset_id) + except Exception: + logging.exception( + 'probe_video_duration on_failure cleanup failed for %s', + asset_id, + ) + + @celery.task( + base=_ProbeVideoTask, time_limit=120, autoretry_for=(sh.TimeoutException, sh.ErrorReturnCode, OSError), retry_backoff=10, @@ -233,6 +273,9 @@ def probe_video_duration(asset_id: str) -> None: is_processing=False so the operator can adjust manually. - Any other exception is swallowed (logged) so a programmer error in the helper doesn't hammer the worker via retries. + - Retries exhausted (autoretry_for raise after max_retries) → + _ProbeVideoTask.on_failure clears is_processing so the row + doesn't stay stuck at "Processing" indefinitely. """ try: asset = Asset.objects.get(asset_id=asset_id)