From 5f2a4e976f9556799fbecdcbaf07ddf163350b5c Mon Sep 17 00:00:00 2001 From: derrod Date: Sun, 29 Dec 2024 17:20:19 +0100 Subject: [PATCH] obs-outputs: Add support for Hybrid MOV --- plugins/obs-outputs/data/locale/en-US.ini | 1 + plugins/obs-outputs/mp4-mux-internal.h | 92 ++++- plugins/obs-outputs/mp4-mux.c | 428 +++++++++++++++++----- plugins/obs-outputs/mp4-mux.h | 10 +- plugins/obs-outputs/mp4-output.c | 56 ++- plugins/obs-outputs/obs-outputs.c | 2 + 6 files changed, 485 insertions(+), 104 deletions(-) diff --git a/plugins/obs-outputs/data/locale/en-US.ini b/plugins/obs-outputs/data/locale/en-US.ini index 5eded159f..411a63105 100644 --- a/plugins/obs-outputs/data/locale/en-US.ini +++ b/plugins/obs-outputs/data/locale/en-US.ini @@ -11,6 +11,7 @@ MP4Output="MP4 File Output" MP4Output.FilePath="File Path" MP4Output.StartChapter="Start" MP4Output.UnnamedChapter="Unnamed" +MOVOutput="MOV File Output" IPFamily="IP Address Family" IPFamily.Both="IPv4 and IPv6 (Default)" diff --git a/plugins/obs-outputs/mp4-mux-internal.h b/plugins/obs-outputs/mp4-mux-internal.h index 85393f2c6..4136927ae 100644 --- a/plugins/obs-outputs/mp4-mux-internal.h +++ b/plugins/obs-outputs/mp4-mux-internal.h @@ -23,13 +23,6 @@ #include #include -/* Flavour for target compatibility */ -enum mp4_flavour { - MP4, /* ISO/IEC 14496-12 */ - MOV, /* Apple QuickTime */ - CMAF, /* ISO/IEC 23000-19 */ -}; - enum mp4_track_type { TRACK_UNKNOWN, TRACK_VIDEO, @@ -44,6 +37,7 @@ enum mp4_codec { CODEC_H264, CODEC_HEVC, CODEC_AV1, + CODEC_PRORES, /* Audio Codecs */ CODEC_AAC, @@ -97,7 +91,7 @@ struct mp4_track { /* Time Base (1/FPS for video, 1/sample rate for audio) */ uint32_t timebase_num; uint32_t timebase_den; - /* Output timescale calculated from time base (Video only) */ + /* Output timescale calculated from time base */ uint32_t timescale; /* First PTS this track has seen (in track timescale) */ @@ -133,7 +127,7 @@ struct mp4_mux { struct serializer *serializer; /* Target format compatibility */ - enum mp4_flavour mode; + enum mp4_flavor flavor; /* Flags */ enum mp4_mux_flags flags; @@ -340,3 +334,83 @@ static const char CHAPTER_PKT_FOOTER[12] = { 0x00, 0x00, 0x01, 0x00 }; /* clang-format on */ + +/** QTFF/MOV specifics **/ + +/* https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_2#LPCM-flag-values */ +enum lpcm_flags { + kAudioFormatFlagIsFloat = (1 << 0), + kAudioFormatFlagIsSignedInteger = (1 << 2), + kAudioFormatFlagIsPacked = (1 << 3), + kLinearPCMFormatFlagIsFloat = kAudioFormatFlagIsFloat, + kLinearPCMFormatFlagIsSignedInteger = kAudioFormatFlagIsSignedInteger, + kLinearPCMFormatFlagIsPacked = kAudioFormatFlagIsPacked, +}; + +static inline uint32_t get_lpcm_flags(enum mp4_codec codec) +{ + if (codec == CODEC_PCM_F32) + return kLinearPCMFormatFlagIsFloat | kLinearPCMFormatFlagIsPacked; + if (codec == CODEC_PCM_I16 || codec == CODEC_PCM_I24) + return kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; + + return 0; +} + +enum channel_map_bits { + FL = 1 << 0, + FR = 1 << 1, + FC = 1 << 2, + LFE = 1 << 3, + RL = 1 << 4, + RR = 1 << 5, + RC = 1 << 8, + SL = 1 << 9, + SR = 1 << 10, +}; + +static uint32_t get_mov_channel_bitmap(enum speaker_layout layout) +{ + switch (layout) { + case SPEAKERS_MONO: + return FC; + case SPEAKERS_STEREO: + return FL | FR; + case SPEAKERS_2POINT1: + return FL | FR | LFE; + case SPEAKERS_4POINT0: + return FL | FR | FC | RC; + case SPEAKERS_4POINT1: + return FL | FR | FC | LFE | RC; + case SPEAKERS_5POINT1: + return FL | FR | FC | LFE | RL | RR; + case SPEAKERS_7POINT1: + return FL | FR | FC | LFE | RL | RR | SL | SR; + case SPEAKERS_UNKNOWN: + break; + } + + return 0; +} + +enum coreaudio_layout { + kAudioChannelLayoutTag_UseChannelBitmap = (1 << 16) | 0, + kAudioChannelLayoutTag_Mono = (100 << 16) | 1, + kAudioChannelLayoutTag_Stereo = (101 << 16) | 2, + kAudioChannelLayoutTag_DVD_4 = (133 << 16) | 3, // 2.1 (AAC Only) +}; + +static enum coreaudio_layout get_mov_channel_layout(enum mp4_codec codec, enum speaker_layout layout) +{ + switch (layout) { + case SPEAKERS_MONO: + return kAudioChannelLayoutTag_Mono; + case SPEAKERS_STEREO: + return kAudioChannelLayoutTag_Stereo; + case SPEAKERS_2POINT1: + /* Only supported for AAC. */ + return codec == CODEC_AAC ? kAudioChannelLayoutTag_DVD_4 : kAudioChannelLayoutTag_UseChannelBitmap; + default: + return kAudioChannelLayoutTag_UseChannelBitmap; + } +} diff --git a/plugins/obs-outputs/mp4-mux.c b/plugins/obs-outputs/mp4-mux.c index eb9390516..daac3e73d 100644 --- a/plugins/obs-outputs/mp4-mux.c +++ b/plugins/obs-outputs/mp4-mux.c @@ -37,8 +37,9 @@ * Standard identifier is included if not referring to ISO/IEC 14496-12. */ -#define do_log(level, format, ...) \ - blog(level, "[mp4 muxer: '%s'] " format, obs_output_get_name(mux->output), ##__VA_ARGS__) +#define do_log(level, format, ...) \ + blog(level, "[%s muxer: '%s'] " format, mux->flavor == FLAVOR_MOV ? "mov" : "mp4", \ + obs_output_get_name(mux->output), ##__VA_ARGS__) #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__) #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__) @@ -86,44 +87,49 @@ static size_t mp4_write_ftyp(struct mp4_mux *mux, bool fragmented) write_box(s, 0, "ftyp"); - const char *major_brand = "isom"; - /* Following FFmpeg's example, when using negative CTS the major brand - * needs to be either iso4 or iso6 depending on whether the file is - * currently fragmented. */ - if (mux->flags & MP4_USE_NEGATIVE_CTS) - major_brand = fragmented ? "iso6" : "iso4"; + if (mux->flavor == FLAVOR_MOV) { + /* For MOV, the brand is just "qt" followed by two spaces. */ + s_write(s, "qt ", 4); // major brand + s_wb32(s, 0x20140200); // minor version (BCD YYYYMM00 per QTFF spec) + s_write(s, "qt ", 4); // minor brand + } else { + const char *major_brand = "isom"; + /* Following FFmpeg's example, when using negative CTS the major brand + * needs to be either iso4 or iso6 depending on whether the file is + * currently fragmented. */ + if (mux->flags & MP4_USE_NEGATIVE_CTS) + major_brand = fragmented ? "iso6" : "iso4"; - s_write(s, major_brand, 4); // major brand - s_wb32(s, 512); // minor version + s_write(s, major_brand, 4); // major brand + s_wb32(s, 0); // minor version + s_write(s, major_brand, 4); // minor brands (first one matches major brand) - // minor brands (first one matches major brand) - s_write(s, major_brand, 4); + /* Write isom base brand if it's not the major brand */ + if (strcmp(major_brand, "isom") != 0) + s_write(s, "isom", 4); - /* Write isom base brand if it's not the major brand */ - if (strcmp(major_brand, "isom") != 0) - s_write(s, "isom", 4); + /* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand + * as a placeholder to maintain ftyp box size. */ + if (fragmented && strcmp(major_brand, "iso6") != 0) + s_write(s, "iso6", 4); + else + s_write(s, "obs1", 4); - /* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand - * as a placeholder to maintain ftyp box size. */ - if (fragmented && strcmp(major_brand, "iso6") != 0) - s_write(s, "iso6", 4); - else - s_write(s, "obs1", 4); + s_write(s, "iso2", 4); - s_write(s, "iso2", 4); - - /* Include H.264 brand if used */ - for (size_t i = 0; i < mux->tracks.num; i++) { - struct mp4_track *track = &mux->tracks.array[i]; - if (track->type == TRACK_VIDEO) { - if (track->codec == CODEC_H264) - s_write(s, "avc1", 4); - break; + /* Include H.264 brand if used */ + for (size_t i = 0; i < mux->tracks.num; i++) { + struct mp4_track *track = &mux->tracks.array[i]; + if (track->type == TRACK_VIDEO) { + if (track->codec == CODEC_H264) + s_write(s, "avc1", 4); + break; + } } - } - /* General MP4 brannd */ - s_write(s, "mp41", 4); + /* General MP4 brannd */ + s_write(s, "mp41", 4); + } return write_box_size(s, start); } @@ -136,7 +142,7 @@ static size_t mp4_write_free(struct mp4_mux *mux) /* Write a 16-byte free box, so it can be replaced with a 64-bit size * box header (u32 + char[4] + u64) */ s_wb32(s, 16); - s_write(s, "free", 4); + s_write(s, mux->flavor == FLAVOR_MOV ? "wide" : "free", 4); s_wb64(s, 0); return 16; @@ -269,6 +275,11 @@ static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track) /* use 64-bit duration if necessary */ if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) { + if (mux->flavor == FLAVOR_MOV) { + /* QTFF does not specify how to handle 32-bit overflow for duration/timestamps. */ + warn("Duration too large for MOV, this file may be unplayable in QuickTime!"); + } + size = 44; version = 1; } @@ -287,8 +298,8 @@ static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track) s_wb32(s, (uint32_t)duration); // duration } - s_wb16(s, 21956); // language (undefined) - s_wb16(s, 0); // pre_defined + s_wb16(s, mux->flavor == FLAVOR_MOV ? 32767 : 21956); // language (undefined) + s_wb16(s, 0); // pre_defined return size; } @@ -301,10 +312,15 @@ static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track) write_fullbox(s, 0, "hdlr", 0, 0); - s_wb32(s, 0); // pre_defined + if (mux->flavor == FLAVOR_MOV) + s_write(s, track ? "mhlr" : "dhlr", 4); + else + s_wb32(s, 0); // pre_defined // handler_type - if (track->type == TRACK_VIDEO) + if (!track) + s_write(s, "url ", 4); + else if (track->type == TRACK_VIDEO) s_write(s, "vide", 4); else if (track->type == TRACK_CHAPTERS) s_write(s, "text", 4); @@ -315,13 +331,25 @@ static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track) s_wb32(s, 0); // reserved s_wb32(s, 0); // reserved - // name (utf-8 string, null terminated) - if (track->type == TRACK_VIDEO) - s_write(s, "OBS Video Handler", 18); + const char *handler_name; + if (!track) + handler_name = "OBS Data Handler"; + else if (track->type == TRACK_VIDEO) + handler_name = "OBS Video Handler"; else if (track->type == TRACK_CHAPTERS) - s_write(s, "OBS Chapter Handler", 20); + handler_name = "OBS Chapter Handler"; else - s_write(s, "OBS Audio Handler", 18); + handler_name = "OBS Audio Handler"; + + // name (null-terminated for MP4, pascal string for MOV) + size_t handler_len = strlen(handler_name); + if (mux->flavor == FLAVOR_MOV) { + s_w8(s, (uint8_t)handler_len); + s_write(s, handler_name, handler_len); + } else { + s_write(s, handler_name, handler_len); + s_w8(s, 0); // NULL terminator + } return write_box_size(s, start); } @@ -538,9 +566,16 @@ static inline void mp4_write_visual_sample_entry(struct mp4_mux *mux, obs_encode // VisualSampleEntry Box s_wb16(s, 0); // pre_defined s_wb16(s, 0); // reserved - s_wb32(s, 0); // pre_defined - s_wb32(s, 0); // pre_defined - s_wb32(s, 0); // pre_defined + + if (mux->flavor == FLAVOR_MOV) { + s_write(s, "OBSS", 4); // vendor + s_wb32(s, 0x200); // temporal quality (codecNormalQuality = 512) + s_wb32(s, 0x200); // spatial quality (codecNormalQuality) + } else { + s_wb32(s, 0); // pre_defined + s_wb32(s, 0); // pre_defined + s_wb32(s, 0); // pre_defined + } s_wb16(s, (uint16_t)obs_encoder_get_width(enc)); // width s_wb16(s, (uint16_t)obs_encoder_get_height(enc)); // height @@ -705,6 +740,47 @@ static size_t mp4_write_av01(struct mp4_mux *mux, obs_encoder_t *enc) return write_box_size(s, start); } +/// (QTFF/Apple) Video Sample Description +static size_t mp4_write_prores(struct mp4_mux *mux, obs_encoder_t *enc) +{ + struct serializer *s = mux->serializer; + int64_t start = serializer_get_pos(s); + + /* We get the tag as an int, but need it as a char[4] */ + union tag { + char c[4]; + uint32_t i; + } codec_tag; + + /* Codec tag varies for ProRes depending on configuration, so we need to get it from the encoder. */ + obs_data_t *settings = obs_encoder_get_settings(enc); + codec_tag.i = (uint32_t)obs_data_get_int(settings, "codec_type"); + obs_data_release(settings); + +#if __BYTE_ORDER == __LITTLE_ENDIAN + codec_tag.i = ((codec_tag.i >> 24) & 0x000000FF) | ((codec_tag.i << 8) & 0x00FF0000) | + ((codec_tag.i >> 8) & 0x0000FF00) | ((codec_tag.i << 24) & 0xFF000000); +#endif + + write_box(s, 0, codec_tag.c); + + mp4_write_visual_sample_entry(mux, enc); + + // colr + mp4_write_colr(mux, enc); + + // clli + mp4_write_clli(mux, enc); + + // mdcv + mp4_write_mdcv(mux, enc); + + // pasp + mp4_write_pasp(mux); + + return write_box_size(s, start); +} + static inline void put_descr(struct serializer *s, uint8_t tag, size_t size) { int i = 3; @@ -776,6 +852,8 @@ static size_t mp4_write_esds(struct mp4_mux *mux, struct mp4_track *track) static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux, struct mp4_track *track, uint8_t version) { struct serializer *s = mux->serializer; + bool is_mov = mux->flavor == FLAVOR_MOV; + bool is_pcm = track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 || track->codec == CODEC_PCM_F32; // SampleEntry Box s_w8(s, 0); // reserved @@ -788,33 +866,69 @@ static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux, struct mp4_ s_wb16(s, 1); // data_reference_index // AudioSampleEntry Box - if (version == 1) { - s_wb16(s, 1); // entry_version - s_wb16(s, 0); // reserved - s_wb16(s, 0); // reserved - s_wb16(s, 0); // reserved - } else { - s_wb32(s, 0); // reserved - s_wb32(s, 0); // reserved - } + s_wb16(s, version); // entry_version + s_wb16(s, 0); // reserved + s_wb16(s, 0); // reserved + s_wb16(s, 0); // reserved audio_t *audio = obs_encoder_audio(track->encoder); - size_t channels = audio_output_get_channels(audio); + uint32_t channels = (uint32_t)audio_output_get_channels(audio); uint32_t sample_rate = track->timescale; bool alac = track->codec == CODEC_ALAC; - s_wb16(s, (uint32_t)channels); // channelcount + /* MOV specific version: https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_2 */ + if (version == 2) { + // We need to get the raw float bytes, union seems to be the easiest way to do that. + union rate { + uint64_t u; + double f; + } rate; + rate.f = (double)sample_rate; - /* OBS FLAC is currently always 16 bit, ALAC always 24, this may change - * in the futrure and should be handled differently then. - * That being said thoes codecs are self-describing so in most cases it - * shouldn't matter either way. */ - s_wb16(s, alac ? 24 : 16); // samplesize + s_wb16(s, 3); // always3 + s_wb16(s, 16); // always16 + s_wb16(s, 0xfffe); // alwaysMinus2 + s_wb16(s, 0); // always0 + s_wb32(s, 0x00010000); // always65536 + s_wb32(s, 72); // sizeOfStructOnly (start of containing box to constLPCMFramesPerAudioPacket) + s_wb64(s, rate.u); // audioSampleRate + s_wb32(s, channels); // numAudioChannels + s_wb32(s, 0x7F000000); // always7F000000 + s_wb32(s, is_pcm ? track->sample_size / channels * 8 : 0); // constBitsPerChannel + s_wb32(s, get_lpcm_flags(track->codec)); // formatSpecificFlags + s_wb32(s, is_pcm ? track->sample_size : 0); // constBytesPerAudioPacket + s_wb32(s, is_pcm ? 1 : 0); // constLPCMFramesPerAudioPacket + } else { + s_wb16(s, channels); // channelcount - s_wb16(s, 0); // pre_defined - s_wb16(s, 0); // reserved + /* OBS FLAC is currently always 16-bit, ALAC always 24, this may change in the future and should be + * handled differently then. + * That being said those codecs are self-describing, so in most cases it shouldn't actually matter. */ + s_wb16(s, !is_mov && alac ? 24 : 16); // samplesize - s_wb32(s, sample_rate << 16); // samplerate + s_wb16(s, is_mov && !is_pcm ? -2 : 0); // pre_defined (compression ID in MOV) + s_wb16(s, 0); // reserved + + /* The sample rate field is limited to 16-bits. Technically version 1 supports a "srat" box which + * provides 32-bits, but this is not supported by most software (including FFmpeg and Chromium). + * For encoded codecs (AAC etc.), the sample rate can be read from the encoded data itself. + * For PCM FFmpeg will try to use the timescale as sample rate. */ + if (sample_rate > UINT16_MAX) { + warn("Sample rate too high for MP4, file may not play back correctly."); + sample_rate = 0; + } + + s_wb32(s, sample_rate << 16); // samplerate + + /* MOV-only data: https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_1 */ + if (is_mov && version == 1) { + size_t frame_size = obs_encoder_get_frame_size(track->encoder); + s_wb32(s, is_pcm ? 1 : (uint32_t)frame_size); // frame size + s_wb32(s, is_pcm ? track->sample_size / channels : 0); // bytes per packet + s_wb32(s, is_pcm ? track->sample_size : 0); // bytes per frame + s_wb32(s, 2); // bytes per sample, 2 for anything but 8-bit + } + } } /// 12.2.4 Channel layout @@ -1059,6 +1173,103 @@ static size_t mp4_write_Opus(struct mp4_mux *mux, struct mp4_track *track, uint8 return write_box_size(s, start); } +/// (QTFF/Apple) siDecompressionParam Atom ('wave') +static size_t mp4_write_wave(struct mp4_mux *mux, struct mp4_track *track, const char tag[4]) +{ + struct serializer *s = mux->serializer; + int64_t start = serializer_get_pos(s); + + write_box(s, 0, "wave"); + + /* frma atom containing codec tag (again) */ + s_wb32(s, 12); + s_write(s, "frma", 4); + s_write(s, tag, 4); + + if (track->codec == CODEC_AAC) { + mp4_write_esds(mux, track); + } else if (track->codec == CODEC_ALAC) { + uint8_t *extradata; + size_t extradata_size; + + if (obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size)) { + /* Apple Lossless Magic Cookie */ + s_write(s, extradata, extradata_size); + } + } + + /* Terminator atom */ + s_wb32(s, 8); // size + s_wb32(s, 0); // NULL name + + return write_box_size(s, start); +} + +/// (QTFF/Apple) Audio Channel Layout Atom (‘chan’) +static size_t mp4_write_chan(struct mp4_mux *mux, struct mp4_track *track) +{ + struct serializer *s = mux->serializer; + int64_t start = serializer_get_pos(s); + + audio_t *audio = obs_encoder_audio(track->encoder); + const struct audio_output_info *info = audio_output_get_info(audio); + uint32_t layout = get_mov_channel_layout(track->codec, info->speakers); + uint32_t bitmap = layout == kAudioChannelLayoutTag_UseChannelBitmap ? get_mov_channel_bitmap(info->speakers) + : 0; + if (layout == kAudioChannelLayoutTag_UseChannelBitmap && !bitmap) { + warn("No valid speaker layout found, not writing chan box. File may not play back correctly!"); + return 0; + } + + write_fullbox(s, 0, "chan", 0, 0); + /* AudioChannelLayout from CoreAudioTypes.h */ + s_wb32(s, layout); // mChannelLayoutTag + s_wb32(s, bitmap); // mChannelBitmap + s_wb32(s, 0); // mNumberChannelDescriptions + + return write_box_size(s, start); +} + +/// (QTFF/Apple) Sound Sample Description (v1 and v2) +static size_t mp4_write_mov_audio_tag(struct mp4_mux *mux, struct mp4_track *track) +{ + struct serializer *s = mux->serializer; + int64_t start = serializer_get_pos(s); + + const char *tag = NULL; + audio_t *audio = obs_encoder_audio(track->encoder); + uint32_t sample_rate = audio_output_get_sample_rate(audio); + size_t channels = audio_output_get_channels(audio); + /* More than 2 channels or samples rates above 65535 Hz requires v2 */ + uint8_t version = (channels > 2 || sample_rate > UINT16_MAX) ? 2 : 1; + + if (track->codec == CODEC_PCM_F32 || track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24) { + tag = "lpcm"; + version = 2; /* lpcm also requires v2 */ + } else if (track->codec == CODEC_AAC) { + tag = "mp4a"; + } else if (track->codec == CODEC_ALAC) { + tag = "alac"; + } + + /* Unsupported/Unknown codec */ + if (!tag) + return 0; + + write_box(s, 0, tag); + + mp4_write_audio_sample_entry(mux, track, version); + + // wave + if (version == 1) + mp4_write_wave(mux, track, tag); + + // chan + mp4_write_chan(mux, track); + + return write_box_size(s, start); +} + /// 8.5.2 Sample Description Box static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track) { @@ -1069,7 +1280,7 @@ static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track) * but in practice that doesn't appear to matter. */ uint8_t version = 0; - if (track->type == TRACK_AUDIO) { + if (track->type == TRACK_AUDIO && mux->flavor != FLAVOR_MOV) { audio_t *audio = obs_encoder_audio(track->encoder); version = audio_output_get_channels(audio) > 2 ? 1 : 0; } @@ -1086,18 +1297,24 @@ static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track) mp4_write_hvc1(mux, track->encoder); else if (track->codec == CODEC_AV1) mp4_write_av01(mux, track->encoder); + else if (track->codec == CODEC_PRORES) + mp4_write_prores(mux, track->encoder); } else if (track->type == TRACK_AUDIO) { - if (track->codec == CODEC_AAC) - mp4_write_mp4a(mux, track, version); - else if (track->codec == CODEC_OPUS) - mp4_write_Opus(mux, track, version); - else if (track->codec == CODEC_FLAC) - mp4_write_fLaC(mux, track, version); - else if (track->codec == CODEC_ALAC) - mp4_write_alac(mux, track, version); - else if (track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 || - track->codec == CODEC_PCM_F32) - mp4_write_xpcm(mux, track, version); + if (mux->flavor == FLAVOR_MOV) { + mp4_write_mov_audio_tag(mux, track); + } else { + if (track->codec == CODEC_AAC) + mp4_write_mp4a(mux, track, version); + else if (track->codec == CODEC_OPUS) + mp4_write_Opus(mux, track, version); + else if (track->codec == CODEC_FLAC) + mp4_write_fLaC(mux, track, version); + else if (track->codec == CODEC_ALAC) + mp4_write_alac(mux, track, version); + else if (track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 || + track->codec == CODEC_PCM_F32) + mp4_write_xpcm(mux, track, version); + } } else if (track->type == TRACK_CHAPTERS) { mp4_write_text(mux); } @@ -1419,8 +1636,8 @@ static size_t mp4_write_stbl(struct mp4_mux *mux, struct mp4_track *track, bool // stts mp4_write_stts(mux, track, fragmented); - // stss (non-fragmented only) - if (track->type == TRACK_VIDEO && !fragmented) + // stss (non-fragmented/non-prores only) + if (track->type == TRACK_VIDEO && !fragmented && track->codec != CODEC_PRORES) mp4_write_stss(mux, track); // ctts (non-fragmented only) @@ -1510,6 +1727,10 @@ static size_t mp4_write_minf(struct mp4_mux *mux, struct mp4_track *track, bool else mp4_write_smhd(mux); + // hdlr for dinf, required in MOV only + if (mux->flavor == FLAVOR_MOV) + mp4_write_hdlr(mux, NULL); + // dinf, unnecessary but mandatory mp4_write_dinf(mux); @@ -1764,6 +1985,22 @@ static size_t mp4_write_data_atom(struct mp4_mux *mux, const char *data) return size; } +/// (QTFF/Apple) String atom +static size_t mp4_write_string_data_atom(struct mp4_mux *mux, const char name[4], const char *data) +{ + struct serializer *s = mux->serializer; + int64_t start = serializer_get_pos(s); + + uint16_t len = (uint16_t)strlen(data); + + write_box(s, 0, name); + s_wb16(s, len); // String length + s_write(s, "\x55\xC4", 2); // language code, just using undefined + s_write(s, data, len); // Note: No NULL terminator + + return write_box_size(s, start); +} + /// (QTFF/Apple) Metadata item atom static size_t mp4_write_ilst_item_atom(struct mp4_mux *mux, const char name[4], const char *value) { @@ -1939,8 +2176,21 @@ static size_t mp4_write_udta(struct mp4_mux *mux) /* Normally metadata would be directly in the moov, but since this is * Apple/QTFF format metadata it is inside udta. */ - // meta - mp4_write_meta(mux); + if (mux->flavor == FLAVOR_MOV && !(mux->flags & MP4_USE_MDTA_KEY_VALUE)) { + // keys directly in udta atom + struct dstr value = {0}; + + /* Encoder name */ + dstr_cat(&value, "OBS Studio ("); + dstr_cat(&value, obs_get_version_string()); + dstr_cat(&value, ")"); + mp4_write_string_data_atom(mux, "\251swr", value.array); + + dstr_free(&value); + } else { + // meta + mp4_write_meta(mux); + } return write_box_size(s, start); } @@ -2489,6 +2739,8 @@ static inline enum mp4_codec get_codec(obs_encoder_t *enc) return CODEC_HEVC; if (strcmp(codec, "av1") == 0) return CODEC_AV1; + if (strcmp(codec, "prores") == 0) + return CODEC_PRORES; if (strcmp(codec, "aac") == 0) return CODEC_AAC; if (strcmp(codec, "opus") == 0) @@ -2582,16 +2834,24 @@ static inline void free_track(struct mp4_track *track) /* ===========================================================================*/ /* API */ -struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags) +struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags, + enum mp4_flavor flavor) { struct mp4_mux *mux = bzalloc(sizeof(struct mp4_mux)); mux->output = output; mux->serializer = serializer; mux->flags = flags; + mux->flavor = flavor; /* Timestamp is based on 1904 rather than 1970. */ mux->creation_time = time(NULL) + 0x7C25B080; + if (flavor == FLAVOR_MOV && mux->creation_time > UINT32_MAX) { + /* This will only happen in 2040 but better safe than sorry! */ + warn("Creation time too large for MOV, setting to 0 (unset)."); + mux->creation_time = 0; + } + for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) { obs_encoder_t *enc = obs_output_get_video_encoder2(output, i); if (!enc) @@ -2657,6 +2917,8 @@ bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt) obs_parse_hevc_packet(&parsed_packet, pkt); else if (track->codec == CODEC_AV1) obs_parse_av1_packet(&parsed_packet, pkt); + else if (track->codec == CODEC_PRORES) + obs_encoder_packet_ref(&parsed_packet, pkt); /* Set fragmentation PTS if packet is keyframe and PTS > 0 */ if (parsed_packet.keyframe && parsed_packet.pts > 0) { @@ -2705,7 +2967,7 @@ bool mp4_mux_finalise(struct mp4_mux *mux) info("Number of fragments: %u", mux->fragments_written); if (mux->flags & MP4_SKIP_FINALISATION) { - warn("Skipping MP4 finalization!"); + warn("Skipping finalization!"); return true; } diff --git a/plugins/obs-outputs/mp4-mux.h b/plugins/obs-outputs/mp4-mux.h index ccb7b7f4b..bf77812b1 100644 --- a/plugins/obs-outputs/mp4-mux.h +++ b/plugins/obs-outputs/mp4-mux.h @@ -22,6 +22,13 @@ struct mp4_mux; +/* Flavor for target compatibility */ +enum mp4_flavor { + FLAVOR_MP4, /* ISO/IEC 14496-12 */ + FLAVOR_MOV, /* Apple QuickTime */ + FLAVOR_CMAF, /* ISO/IEC 23000-19 (not yet implemented) */ +}; + enum mp4_mux_flags { /* Uses mdta key/value list for metadata instead of QuickTime keys */ MP4_USE_MDTA_KEY_VALUE = 1 << 0, @@ -33,7 +40,8 @@ enum mp4_mux_flags { MP4_USE_NEGATIVE_CTS = 1 << 3, }; -struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags); +struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags, + enum mp4_flavor flavor); void mp4_mux_destroy(struct mp4_mux *mux); bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt); bool mp4_mux_add_chapter(struct mp4_mux *mux, int64_t dts_usec, const char *name); diff --git a/plugins/obs-outputs/mp4-output.c b/plugins/obs-outputs/mp4-output.c index 261e04786..cc68b6587 100644 --- a/plugins/obs-outputs/mp4-output.c +++ b/plugins/obs-outputs/mp4-output.c @@ -28,8 +28,9 @@ #include -#define do_log(level, format, ...) \ - blog(level, "[mp4 output: '%s'] " format, obs_output_get_name(out->output), ##__VA_ARGS__) +#define do_log(level, format, ...) \ + blog(level, "[%s output: '%s'] " format, out->muxer_flavor == FLAVOR_MOV ? "mov" : "mp4", \ + obs_output_get_name(out->output), ##__VA_ARGS__) #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__) #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__) @@ -58,6 +59,7 @@ struct mp4_output { pthread_mutex_t mutex; struct mp4_mux *muxer; + enum mp4_flavor muxer_flavor; int flags; size_t chapter_ctr; @@ -140,6 +142,12 @@ static const char *mp4_output_name(void *unused) return obs_module_text("MP4Output"); } +static const char *mov_output_name(void *unused) +{ + UNUSED_PARAMETER(unused); + return obs_module_text("MOVOutput"); +} + static void mp4_clear_chapters(struct mp4_output *out) { while (out->chapters.size) { @@ -233,10 +241,11 @@ static void split_file_proc(void *data, calldata_t *cd) os_atomic_set_bool(&out->manual_split, true); } -static void *mp4_output_create(obs_data_t *settings, obs_output_t *output) +static void *mp4_output_create_internal(obs_data_t *settings, obs_output_t *output, enum mp4_flavor flavor) { struct mp4_output *out = bzalloc(sizeof(struct mp4_output)); out->output = output; + out->muxer_flavor = flavor; pthread_mutex_init(&out->mutex, NULL); signal_handler_t *sh = obs_output_get_signal_handler(output); @@ -250,6 +259,16 @@ static void *mp4_output_create(obs_data_t *settings, obs_output_t *output) return out; } +static void *mp4_output_create(obs_data_t *settings, obs_output_t *output) +{ + return mp4_output_create_internal(settings, output, FLAVOR_MP4); +} + +static void *mov_output_create(obs_data_t *settings, obs_output_t *output) +{ + return mp4_output_create_internal(settings, output, FLAVOR_MOV); +} + static inline void apply_flag(int *flags, const char *value, int flag_value) { if (atoi(value)) @@ -325,7 +344,7 @@ static bool mp4_output_start(void *data) obs_data_release(settings); if (!buffered_file_serializer_init(&out->serializer, out->path.array, out->buffer_size, out->chunk_size)) { - warn("Unable to open MP4 file '%s'", out->path.array); + warn("Unable to open file '%s'", out->path.array); return false; } @@ -333,11 +352,11 @@ static bool mp4_output_start(void *data) obs_output_add_packet_callback(out->output, mp4_pkt_callback, (void *)out); /* Initialise muxer and start capture */ - out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags); + out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags, out->muxer_flavor); os_atomic_set_bool(&out->active, true); obs_output_begin_data_capture(out->output, 0); - info("Writing Hybrid MP4 file '%s'...", out->path.array); + info("Writing Hybrid MP4/MOV file '%s'...", out->path.array); return true; } @@ -436,18 +455,18 @@ static bool change_file(struct mp4_output *out, struct encoder_packet *pkt) mp4_mux_destroy(out->muxer); mp4_clear_chapters(out); - info("MP4 file split complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000); + info("File split complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000); /* open new file */ generate_filename(out, &out->path, out->allow_overwrite); info("Changing output file to '%s'", out->path.array); if (!buffered_file_serializer_init(&out->serializer, out->path.array, out->buffer_size, out->chunk_size)) { - warn("Unable to open MP4 file '%s'", out->path.array); + warn("Unable to open file '%s'", out->path.array); return false; } - out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags); + out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags, out->muxer_flavor); calldata_t cd = {0}; signal_handler_t *sh = obs_output_get_signal_handler(out->output); @@ -500,7 +519,7 @@ static void mp4_output_actual_stop(struct mp4_output *out, int code) /* Clear chapter data */ mp4_clear_chapters(out); - info("MP4 file output complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000); + info("File output complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000); } static void push_back_packet(struct mp4_output *out, struct encoder_packet *packet) @@ -624,7 +643,7 @@ struct obs_output_info mp4_output_info = { .id = "mp4_output", .flags = OBS_OUTPUT_AV | OBS_OUTPUT_ENCODED | OBS_OUTPUT_MULTI_TRACK_AV | OBS_OUTPUT_CAN_PAUSE, .encoded_video_codecs = "h264;hevc;av1", - .encoded_audio_codecs = "aac", + .encoded_audio_codecs = "aac;alac;flac;opus", .get_name = mp4_output_name, .create = mp4_output_create, .destroy = mp4_output_destroy, @@ -634,3 +653,18 @@ struct obs_output_info mp4_output_info = { .get_properties = mp4_output_properties, .get_total_bytes = mp4_output_total_bytes, }; + +struct obs_output_info mov_output_info = { + .id = "mov_output", + .flags = OBS_OUTPUT_AV | OBS_OUTPUT_ENCODED | OBS_OUTPUT_MULTI_TRACK_AV | OBS_OUTPUT_CAN_PAUSE, + .encoded_video_codecs = "h264;hevc;prores", + .encoded_audio_codecs = "aac;alac", + .get_name = mov_output_name, + .create = mov_output_create, + .destroy = mp4_output_destroy, + .start = mp4_output_start, + .stop = mp4_output_stop, + .encoded_packet = mp4_output_packet, + .get_properties = mp4_output_properties, + .get_total_bytes = mp4_output_total_bytes, +}; diff --git a/plugins/obs-outputs/obs-outputs.c b/plugins/obs-outputs/obs-outputs.c index c3adc5d2f..02371260c 100644 --- a/plugins/obs-outputs/obs-outputs.c +++ b/plugins/obs-outputs/obs-outputs.c @@ -16,6 +16,7 @@ extern struct obs_output_info rtmp_output_info; extern struct obs_output_info null_output_info; extern struct obs_output_info flv_output_info; extern struct obs_output_info mp4_output_info; +extern struct obs_output_info mov_output_info; #if defined(_WIN32) && defined(MBEDTLS_THREADING_ALT) void mbed_mutex_init(mbedtls_threading_mutex_t *m) @@ -63,6 +64,7 @@ bool obs_module_load(void) obs_register_output(&null_output_info); obs_register_output(&flv_output_info); obs_register_output(&mp4_output_info); + obs_register_output(&mov_output_info); return true; }