mirror of
https://github.com/spacedriveapp/spacedrive.git
synced 2026-04-17 21:17:08 -04:00
263 lines
7.8 KiB
Rust
263 lines
7.8 KiB
Rust
//! Audio decoding module for extracting PCM samples from media files
|
|
|
|
use crate::{
|
|
codec_ctx::FFmpegCodecContext,
|
|
error::{Error, FFmpegError},
|
|
format_ctx::FFmpegFormatContext,
|
|
packet::FFmpegPacket,
|
|
utils::from_path,
|
|
video_frame::FFmpegFrame,
|
|
};
|
|
|
|
use std::{path::Path, slice};
|
|
|
|
use ffmpeg_sys_next::{av_read_frame, avcodec_find_decoder, AVFrame, AVMediaType, AVSampleFormat};
|
|
|
|
/// Extract audio samples from a media file as 16kHz mono f32 PCM
|
|
pub fn extract_audio_samples(filename: impl AsRef<Path>) -> Result<Vec<f32>, Error> {
|
|
let filename = filename.as_ref();
|
|
|
|
unsafe {
|
|
let mut format_ctx = FFmpegFormatContext::open_file(from_path(filename)?.as_c_str())?;
|
|
format_ctx.find_stream_info()?;
|
|
|
|
// Find the best audio stream
|
|
let audio_stream_index =
|
|
find_best_audio_stream(format_ctx.as_ref()).ok_or(FFmpegError::StreamNotFound)?;
|
|
|
|
let audio_stream = format_ctx
|
|
.stream(audio_stream_index as u32)
|
|
.ok_or(FFmpegError::StreamNotFound)?;
|
|
|
|
// Get codec parameters
|
|
let codecpar = audio_stream
|
|
.codecpar
|
|
.as_ref()
|
|
.ok_or(FFmpegError::NullError)?;
|
|
|
|
// Find decoder
|
|
let decoder = avcodec_find_decoder(codecpar.codec_id)
|
|
.as_ref()
|
|
.ok_or(FFmpegError::DecoderNotFound)?;
|
|
|
|
// Create codec context
|
|
let mut codec_ctx = FFmpegCodecContext::new()?;
|
|
codec_ctx.parameters_to_context(codecpar)?;
|
|
codec_ctx.open2(decoder)?;
|
|
|
|
// Allocate packet and frame using RAII wrappers for automatic cleanup
|
|
let mut packet = FFmpegPacket::new()?;
|
|
let mut frame = FFmpegFrame::new()?;
|
|
|
|
let mut samples = Vec::new();
|
|
|
|
// Read and decode packets
|
|
while av_read_frame(format_ctx.as_mut(), packet.as_ptr()) >= 0 {
|
|
let pkt = packet.as_ref().ok_or(FFmpegError::NullError)?;
|
|
|
|
if pkt.stream_index == audio_stream_index {
|
|
// Send packet to decoder
|
|
if codec_ctx.send_packet(packet.as_ptr()).is_err() {
|
|
packet.unref();
|
|
continue;
|
|
}
|
|
|
|
// Receive decoded frames
|
|
loop {
|
|
match codec_ctx.receive_frame(frame.as_mut()) {
|
|
Ok(true) => {
|
|
// Extract samples from this frame
|
|
let frame_samples = extract_and_convert_frame(frame.as_ref())?;
|
|
samples.extend_from_slice(&frame_samples);
|
|
}
|
|
Ok(false) | Err(FFmpegError::Again) => break,
|
|
Err(e) => {
|
|
// RAII wrappers handle cleanup automatically via Drop
|
|
return Err(e.into());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
packet.unref();
|
|
}
|
|
|
|
// RAII wrappers handle cleanup automatically when they go out of scope
|
|
|
|
// Now resample to 16kHz mono if needed
|
|
let codec_ref = codec_ctx.as_ref();
|
|
let in_sample_rate = codec_ref.sample_rate;
|
|
let in_channels = codec_ref.ch_layout.nb_channels;
|
|
|
|
let final_samples = if in_sample_rate != 16000 || in_channels != 1 {
|
|
resample_audio(&samples, in_sample_rate, in_channels, 16000, 1)?
|
|
} else {
|
|
samples
|
|
};
|
|
|
|
Ok(final_samples)
|
|
}
|
|
}
|
|
|
|
/// Find the best audio stream in a format context
|
|
unsafe fn find_best_audio_stream(format_ctx: &ffmpeg_sys_next::AVFormatContext) -> Option<i32> {
|
|
let streams = format_ctx.streams;
|
|
if streams.is_null() {
|
|
return None;
|
|
}
|
|
|
|
for i in 0..format_ctx.nb_streams {
|
|
let stream = (*streams.add(i as usize)).as_ref()?;
|
|
let codecpar = stream.codecpar.as_ref()?;
|
|
|
|
if codecpar.codec_type == AVMediaType::AVMEDIA_TYPE_AUDIO {
|
|
return Some(i as i32);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Extract and convert audio frame to f32 samples
|
|
unsafe fn extract_and_convert_frame(frame: &AVFrame) -> Result<Vec<f32>, Error> {
|
|
let nb_samples = frame.nb_samples as usize;
|
|
let channels = frame.ch_layout.nb_channels as usize;
|
|
let format = frame.format;
|
|
|
|
match format {
|
|
f if f == AVSampleFormat::AV_SAMPLE_FMT_FLT as i32 => {
|
|
// Interleaved f32 - perfect, just copy
|
|
let data = slice::from_raw_parts(frame.data[0] as *const f32, nb_samples * channels);
|
|
Ok(data.to_vec())
|
|
}
|
|
f if f == AVSampleFormat::AV_SAMPLE_FMT_FLTP as i32 => {
|
|
// Planar f32 - interleave it
|
|
let mut output = Vec::with_capacity(nb_samples * channels);
|
|
for i in 0..nb_samples {
|
|
for ch in 0..channels {
|
|
let channel_data =
|
|
slice::from_raw_parts(frame.data[ch] as *const f32, nb_samples);
|
|
output.push(channel_data[i]);
|
|
}
|
|
}
|
|
Ok(output)
|
|
}
|
|
f if f == AVSampleFormat::AV_SAMPLE_FMT_S16 as i32 => {
|
|
// Interleaved s16 - convert to f32
|
|
let data = slice::from_raw_parts(frame.data[0] as *const i16, nb_samples * channels);
|
|
Ok(data.iter().map(|&s| s as f32 / 32768.0).collect())
|
|
}
|
|
f if f == AVSampleFormat::AV_SAMPLE_FMT_S16P as i32 => {
|
|
// Planar s16 - interleave and convert
|
|
let mut output = Vec::with_capacity(nb_samples * channels);
|
|
for i in 0..nb_samples {
|
|
for ch in 0..channels {
|
|
let channel_data =
|
|
slice::from_raw_parts(frame.data[ch] as *const i16, nb_samples);
|
|
output.push(channel_data[i] as f32 / 32768.0);
|
|
}
|
|
}
|
|
Ok(output)
|
|
}
|
|
f if f == AVSampleFormat::AV_SAMPLE_FMT_S32 as i32 => {
|
|
// Interleaved s32 - convert to f32
|
|
let data = slice::from_raw_parts(frame.data[0] as *const i32, nb_samples * channels);
|
|
Ok(data.iter().map(|&s| s as f32 / 2147483648.0).collect())
|
|
}
|
|
f if f == AVSampleFormat::AV_SAMPLE_FMT_S32P as i32 => {
|
|
// Planar s32 - interleave and convert
|
|
let mut output = Vec::with_capacity(nb_samples * channels);
|
|
for i in 0..nb_samples {
|
|
for ch in 0..channels {
|
|
let channel_data =
|
|
slice::from_raw_parts(frame.data[ch] as *const i32, nb_samples);
|
|
output.push(channel_data[i] as f32 / 2147483648.0);
|
|
}
|
|
}
|
|
Ok(output)
|
|
}
|
|
_ => Err(FFmpegError::UnsupportedFormat.into()),
|
|
}
|
|
}
|
|
|
|
/// Simple resampling using linear interpolation
|
|
/// For production, this should use a proper resampling library
|
|
fn resample_audio(
|
|
samples: &[f32],
|
|
in_rate: i32,
|
|
in_channels: i32,
|
|
out_rate: i32,
|
|
out_channels: i32,
|
|
) -> Result<Vec<f32>, Error> {
|
|
if samples.is_empty() {
|
|
return Ok(Vec::new());
|
|
}
|
|
|
|
let in_rate = in_rate as usize;
|
|
let out_rate = out_rate as usize;
|
|
let in_channels = in_channels as usize;
|
|
let out_channels = out_channels as usize;
|
|
|
|
let in_frames = samples.len() / in_channels;
|
|
let out_frames = (in_frames * out_rate + in_rate - 1) / in_rate;
|
|
|
|
let mut output = Vec::with_capacity(out_frames * out_channels);
|
|
|
|
for out_frame_idx in 0..out_frames {
|
|
// Calculate corresponding input frame (with fractional part)
|
|
let in_frame_pos = (out_frame_idx * in_rate) as f32 / out_rate as f32;
|
|
let in_frame_idx = in_frame_pos as usize;
|
|
let frac = in_frame_pos - in_frame_idx as f32;
|
|
|
|
// For each output channel
|
|
for out_ch in 0..out_channels {
|
|
let mut sample = 0.0f32;
|
|
|
|
if in_channels == out_channels {
|
|
// Same channel count - just resample
|
|
let in_ch = out_ch;
|
|
|
|
if in_frame_idx + 1 < in_frames {
|
|
let s1 = samples[in_frame_idx * in_channels + in_ch];
|
|
let s2 = samples[(in_frame_idx + 1) * in_channels + in_ch];
|
|
sample = s1 * (1.0 - frac) + s2 * frac;
|
|
} else if in_frame_idx < in_frames {
|
|
sample = samples[in_frame_idx * in_channels + in_ch];
|
|
}
|
|
} else if in_channels > out_channels {
|
|
// Downmix (e.g., stereo to mono) - average channels
|
|
let mut sum = 0.0f32;
|
|
let mut count = 0;
|
|
|
|
for in_ch in 0..in_channels {
|
|
if in_frame_idx + 1 < in_frames {
|
|
let s1 = samples[in_frame_idx * in_channels + in_ch];
|
|
let s2 = samples[(in_frame_idx + 1) * in_channels + in_ch];
|
|
sum += s1 * (1.0 - frac) + s2 * frac;
|
|
count += 1;
|
|
} else if in_frame_idx < in_frames {
|
|
sum += samples[in_frame_idx * in_channels + in_ch];
|
|
count += 1;
|
|
}
|
|
}
|
|
|
|
sample = if count > 0 { sum / count as f32 } else { 0.0 };
|
|
} else {
|
|
// Upmix (e.g., mono to stereo) - duplicate channel
|
|
let in_ch = 0;
|
|
|
|
if in_frame_idx + 1 < in_frames {
|
|
let s1 = samples[in_frame_idx * in_channels + in_ch];
|
|
let s2 = samples[(in_frame_idx + 1) * in_channels + in_ch];
|
|
sample = s1 * (1.0 - frac) + s2 * frac;
|
|
} else if in_frame_idx < in_frames {
|
|
sample = samples[in_frame_idx * in_channels + in_ch];
|
|
}
|
|
}
|
|
|
|
output.push(sample);
|
|
}
|
|
}
|
|
|
|
Ok(output)
|
|
}
|