Files
Anthias/tests/test_remote_video.py
Viktor Petersson 70a57bb857 feat(server,api): auto-download remote video URLs into the asset pipeline (#2912)
* feat(server,api): auto-download remote video URLs into the asset pipeline

- Detect http(s) single-file video URLs in CreateAssetSerializerMixin
  (ext-first, HEAD-probe fallback) and rewrite the row to a local
  destination + is_processing=True (v1.2 / v2 only).
- New download_remote_video_asset Celery task streams via requests
  with a 5 GiB cap and chains into normalize_video_asset for the
  per-board HW-codec gate.
- Live streams (RTSP / RTMP / HLS / DASH / SmoothStreaming) stay as
  literal URIs the viewer plays directly.
- Failures land as metadata.error_message + is_processing=False via a
  copy-paste of the YouTube download task's on_failure contract.

Closes #2894

* refactor(remote_video): apply review fixes

- Route HEAD probe + streaming GET through ``AnthiasSession`` so
  origins see the project-wide ``Anthias/<release>`` UA (#2897).
- Drop the ``urllib3`` logger level side effect at import time.
- Trust the serializer-stamped ``Asset.uri`` exclusively; refuse the
  task on an empty uri rather than guessing the extension (which
  could diverge from the HEAD-probed choice).

* test(remote_video): type-annotate destination-path tests for mypy

CI's mypy step rejected ``test_remote_video_destination_path_*`` —
``tmp_path`` was unannotated and the literal ``{'assetdir': str}``
arg failed against ``AnthiasSettings | None``. Cast through
``AnthiasSettings`` (a ``UserDict[str, Any]``) so mypy is happy
without spinning up the real config layer that needs ``HOME`` set.

* refactor(remote_video): address SonarCloud findings

- Extract ``_stream_remote_video_to_file`` +
  ``_validate_remote_video_response`` helpers so
  ``download_remote_video_asset`` lands under SonarCloud's cognitive-
  complexity ceiling.
- Drop redundant ``requests.RequestException`` from the except clause
  — it's a subclass of ``OSError`` so ``except OSError`` already
  covers it (S5713).
- Drop the redundant ``startswith(('http://', 'https://'))`` in the
  serializer; ``is_downloadable_remote_video`` already rejects every
  non-http(s) scheme. Removes the literal ``http://`` hotspot in
  mixins.py.
- Replace ``udp://239.0.0.1:1234`` with ``udp://stream.example.test:1234``
  in the test fixture (S1313 hardcoded IP).
- Annotate the deliberate ``http://`` test case with ``# NOSONAR``
  and a comment explaining the LAN-without-TLS use case.

* refactor: collapse duplicate blocks flagged by SonarCloud

- Extract ``_DownloadAssetTask`` base for the YouTube and remote-
  video download tasks. Subclasses override ``_failure_log_prefix``
  only — the metadata-error / notify body lives in one place.
- Merge ``test_create_remote_hls_manifest_stays_as_stream_url`` and
  ``test_create_rtsp_url_stays_as_stream_url`` into a single
  parametrized test that asserts both shapes through the same path.

Brings new-code duplication below SonarCloud's 3% gate.
2026-05-19 14:23:54 +02:00

279 lines
10 KiB
Python

"""Unit tests for ``anthias_common.remote_video``.
The classifier sits in the synchronous POST /assets path, so behaviour
under each input shape (known extension, manifest, stream scheme,
extensionless URL with various HEAD responses, network failure) is
covered explicitly to lock the create-asset contract.
These tests do not need Django — the helpers are framework-free — but
they live under ``tests/`` so the existing pytest harness picks them
up alongside the celery/processing suites.
"""
from __future__ import annotations
from pathlib import Path
from typing import cast
from unittest import mock
import pytest
import requests
from anthias_common.remote_video import (
is_downloadable_remote_video,
remote_video_destination_path,
)
from anthias_server.settings import AnthiasSettings
# ---------------------------------------------------------------------------
# Extension-based classify (no HEAD call)
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
'uri,expected_ext',
[
('https://example.com/clip.mp4', '.mp4'),
('https://cdn.example.com/path/to/file.webm', '.webm'),
('https://example.com/movie.MOV', '.mov'),
# http (not https) is intentional — the classifier must accept
# both schemes so operators on internal LANs (where TLS isn't
# set up for the media server) can still auto-download.
('http://example.com/x.mkv', '.mkv'), # NOSONAR
('https://example.com/a.avi', '.avi'),
('https://example.com/short.m4v', '.m4v'),
('https://example.com/old.ogv', '.ogv'),
# Query strings and fragments do not fool the extension match.
('https://example.com/clip.mp4?download=true', '.mp4'),
('https://example.com/clip.mp4#t=10', '.mp4'),
],
)
def test_classify_known_video_extension_returns_download(
uri: str, expected_ext: str
) -> None:
"""A URL whose path ends in a known single-file video container
auto-downloads with the matching local extension. No HEAD call
fires — extension match is the fast path."""
with mock.patch('anthias_common.remote_video._session.head') as head:
ok, ext = is_downloadable_remote_video(uri)
assert ok is True
assert ext == expected_ext
head.assert_not_called()
@pytest.mark.parametrize(
'uri',
[
'https://example.com/stream.m3u8',
'https://example.com/dash/manifest.mpd',
'https://example.com/legacy.m3u',
'https://example.com/smooth/Manifest.ism',
],
)
def test_classify_streaming_manifest_extensions_return_stream(
uri: str,
) -> None:
"""HLS / DASH / SmoothStreaming manifests never auto-download —
they describe a stream, not a single file. No HEAD call (the
extension match short-circuits)."""
with mock.patch('anthias_common.remote_video._session.head') as head:
ok, ext = is_downloadable_remote_video(uri)
assert ok is False
assert ext == ''
head.assert_not_called()
@pytest.mark.parametrize(
'uri',
[
'rtsp://camera.local/feed',
'rtmp://media.example.com/live',
'srt://stream.example.com:9000',
'udp://stream.example.test:1234',
'mms://media.example.com/live',
],
)
def test_classify_streaming_schemes_return_stream(uri: str) -> None:
"""RTSP / RTMP / SRT / UDP / MMS are streaming-by-construction,
even if the URL's path happens to end in ``.mp4``. The viewer
plays them live via mpv's network stack."""
with mock.patch('anthias_common.remote_video._session.head') as head:
ok, ext = is_downloadable_remote_video(uri)
assert ok is False
assert ext == ''
head.assert_not_called()
def test_classify_streaming_scheme_with_mp4_path_returns_stream() -> None:
"""``rtsp://camera/feed.mp4`` is RTSP. Path extension does not
promote it to an http(s) download."""
with mock.patch('anthias_common.remote_video._session.head') as head:
ok, ext = is_downloadable_remote_video('rtsp://camera/feed.mp4')
assert ok is False
assert ext == ''
head.assert_not_called()
def test_classify_non_http_scheme_returns_stream() -> None:
"""Non-http(s)/non-streaming schemes (file://, ftp://, ...) get
the negative classify. The classifier deliberately refuses to
download from anything but well-known network protocols."""
with mock.patch('anthias_common.remote_video._session.head') as head:
ok, ext = is_downloadable_remote_video('file:///tmp/clip.mp4')
assert ok is False
assert ext == ''
head.assert_not_called()
def test_classify_empty_uri_returns_stream() -> None:
ok, ext = is_downloadable_remote_video('')
assert ok is False
assert ext == ''
# ---------------------------------------------------------------------------
# HEAD-probe fallback (extensionless / unknown-extension URLs)
# ---------------------------------------------------------------------------
def _fake_head(content_type: str, status_code: int = 200) -> mock.MagicMock:
"""Shape a fake ``requests.head`` response with a given
Content-Type and status code."""
resp = mock.MagicMock()
resp.status_code = status_code
resp.headers = {'Content-Type': content_type}
return resp
def test_classify_bare_url_falls_back_to_head_probe_video() -> None:
"""No extension on the URL, but HEAD reports ``Content-Type:
video/mp4`` → auto-download with the inferred extension."""
with mock.patch(
'anthias_common.remote_video._session.head',
return_value=_fake_head('video/mp4'),
) as head:
ok, ext = is_downloadable_remote_video(
'https://api.example.com/video/12345'
)
assert ok is True
assert ext == '.mp4'
head.assert_called_once()
def test_classify_head_probe_html_returns_stream() -> None:
"""HEAD reports ``Content-Type: text/html`` (a 404 page, a JSON
error envelope's text/html landing page, ...) → stay as stream
URL. The download task would have stored the error page as the
asset; we want the row to remain a literal-URL stream instead."""
with mock.patch(
'anthias_common.remote_video._session.head',
return_value=_fake_head('text/html; charset=utf-8'),
):
ok, ext = is_downloadable_remote_video(
'https://api.example.com/video/12345'
)
assert ok is False
assert ext == ''
def test_classify_head_probe_manifest_content_type_returns_stream() -> None:
"""Some HLS origins serve ``application/vnd.apple.mpegurl`` from
URLs without a ``.m3u8`` extension. Reject those at the HEAD
probe — downloading the manifest as a single file would store
the playlist, not the segments it points at."""
with mock.patch(
'anthias_common.remote_video._session.head',
return_value=_fake_head('application/vnd.apple.mpegurl'),
):
ok, ext = is_downloadable_remote_video(
'https://hls.example.com/stream'
)
assert ok is False
assert ext == ''
def test_classify_head_probe_http_error_returns_stream() -> None:
"""HEAD returns 4xx → stay as stream URL. Some origins respond
405 Method Not Allowed to HEAD; either way, downgrading to
stream-mode keeps the create call from failing — the viewer
will play (or fail to play) the URL as a stream."""
with mock.patch(
'anthias_common.remote_video._session.head',
return_value=_fake_head('video/mp4', status_code=405),
):
ok, ext = is_downloadable_remote_video(
'https://api.example.com/video/12345'
)
assert ok is False
assert ext == ''
@pytest.mark.parametrize(
'exc',
[
requests.exceptions.Timeout('slow origin'),
requests.exceptions.ConnectionError('refused'),
requests.exceptions.TooManyRedirects('loop'),
requests.exceptions.SSLError('bad cert'),
],
)
def test_classify_head_probe_network_failure_returns_stream(
exc: Exception,
) -> None:
"""Any network exception during the HEAD probe → stay as stream
URL. The classifier is best-effort; we never block the create
call on a flaky origin."""
with mock.patch(
'anthias_common.remote_video._session.head',
side_effect=exc,
):
ok, ext = is_downloadable_remote_video(
'https://api.example.com/video/12345'
)
assert ok is False
assert ext == ''
def test_classify_head_probe_uses_short_timeout() -> None:
"""The synchronous HEAD probe must run with the documented 5s
timeout — operators are blocking on the POST /assets call. Any
drift in the timeout constant would slow create requests."""
with mock.patch(
'anthias_common.remote_video._session.head',
return_value=_fake_head('video/mp4'),
) as head:
is_downloadable_remote_video('https://api.example.com/video/12345')
_, kwargs = head.call_args
assert kwargs['timeout'] == 5
assert kwargs['allow_redirects'] is True
# ---------------------------------------------------------------------------
# Destination path
# ---------------------------------------------------------------------------
def test_remote_video_destination_path_uses_assetdir(tmp_path: Path) -> None:
"""The local destination lives under settings['assetdir'] so
cleanup() recognises the downloaded file as referenced and
doesn't sweep it as an orphan."""
# ``AnthiasSettings`` is a ``UserDict`` subclass whose real
# constructor reads ``~/.anthias/anthias.conf``; for the
# destination-path test we only need the ``assetdir`` key, so
# cast a minimal dict to the type to satisfy mypy without
# spinning up the full config layer.
fake_settings = cast(AnthiasSettings, {'assetdir': str(tmp_path)})
result = remote_video_destination_path('abc123', '.mp4', fake_settings)
assert result == f'{tmp_path}/abc123.mp4'
def test_remote_video_destination_path_preserves_extension() -> None:
"""The extension is the caller's responsibility — pass through
verbatim. Allows webm/mkv/avi to land with their real container
so ffprobe identifies them correctly."""
fake_settings = cast(AnthiasSettings, {'assetdir': '/data'})
for ext in ('.mp4', '.webm', '.mkv', '.mov'):
result = remote_video_destination_path('asset-1', ext, fake_settings)
assert result == f'/data/asset-1{ext}'