mirror of
https://github.com/LMMS/lmms.git
synced 2026-03-10 10:10:02 -04:00
experimental support for MMX/SSE/SSE2 instructions
git-svn-id: https://lmms.svn.sf.net/svnroot/lmms/trunk/lmms@1832 0778d3d1-df1d-0410-868b-ea421aaaa00d
This commit is contained in:
@@ -70,6 +70,7 @@ ENDIF(LMMS_BUILD_WIN32)
|
||||
|
||||
|
||||
CHECK_INCLUDE_FILES(stdint.h LMMS_HAVE_STDINT_H)
|
||||
CHECK_INCLUDE_FILES(stdbool.h LMMS_HAVE_STDBOOL_H)
|
||||
CHECK_INCLUDE_FILES(stdlib.h LMMS_HAVE_STDLIB_H)
|
||||
CHECK_INCLUDE_FILES(pthread.h LMMS_HAVE_PTHREAD_H)
|
||||
CHECK_INCLUDE_FILES(semaphore.h LMMS_HAVE_SEMAPHORE_H)
|
||||
@@ -387,6 +388,43 @@ SET(LMMS_ER_H ${CMAKE_CURRENT_BINARY_DIR}/embedded_resources.h)
|
||||
ADD_FILE_DEPENDENCIES(${CMAKE_BINARY_DIR}/lmmsconfig.h ${lmms_MOC_out})
|
||||
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${LMMS_ER_H} COMMAND ${BIN2RES} ARGS ${lmms_EMBEDDED_RESOURCES} > ${LMMS_ER_H} DEPENDS ${BIN2RES})
|
||||
SET(BASIC_OPS_X86_C "${CMAKE_SOURCE_DIR}/src/core/basic_ops_x86.c")
|
||||
|
||||
IF(LMMS_HOST_X86 OR LMMS_HOST_X86_64)
|
||||
|
||||
ADD_CUSTOM_TARGET(regen-basic-ops)
|
||||
|
||||
IF(LMMS_HOST_X86)
|
||||
SET(opt_targets mmx sse sse2)
|
||||
SET(host_arch x86)
|
||||
ELSE(LMMS_HOST_X86)
|
||||
SET(opt_targets sse sse2)
|
||||
SET(host_arch x86_64)
|
||||
ENDIF(LMMS_HOST_X86)
|
||||
|
||||
FOREACH(opt_target ${opt_targets})
|
||||
|
||||
STRING(TOUPPER ${opt_target} OPT_TARGET)
|
||||
|
||||
SET(BASIC_OPS_X86_TARGET_S "${CMAKE_SOURCE_DIR}/src/core/basic_ops_${host_arch}_${opt_target}.s")
|
||||
SET(BASIC_OPS_X86_TARGET_O "${CMAKE_BINARY_DIR}/basic_ops_${host_arch}_${opt_target}.o")
|
||||
IF(NOT "${OPT_TARGET}" STREQUAL "MMX")
|
||||
SET(FPMATH_FLAGS "-mfpmath=sse")
|
||||
ENDIF(NOT "${OPT_TARGET}" STREQUAL "MMX")
|
||||
IF(EXISTS "$ENV{SVN_C_COMPILER}")
|
||||
SET(C_COMPILER $ENV{SVN_C_COMPILER})
|
||||
ELSE(EXISTS "$ENV{SVN_C_COMPILER}")
|
||||
SET(C_COMPILER ${CMAKE_C_COMPILER})
|
||||
ENDIF(EXISTS "$ENV{SVN_C_COMPILER}")
|
||||
ADD_CUSTOM_TARGET(regen-basic-ops-${opt_target} COMMAND ${C_COMPILER} -O2 -ftree-vectorize -ftree-vectorizer-verbose=2 -fomit-frame-pointer -c -S -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS} -o ${BASIC_OPS_X86_TARGET_S} ${BASIC_OPS_X86_C} DEPENDS ${BASIC_OPS_X86_C})
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${BASIC_OPS_X86_TARGET_O} COMMAND ${CMAKE_C_COMPILER} ARGS ${BASIC_OPS_X86_TARGET_S} -c -o ${BASIC_OPS_X86_TARGET_O} DEPENDS ${BASIC_OPS_X86_TARGET_S})
|
||||
ADD_DEPENDENCIES(regen-basic-ops regen-basic-ops-${opt_target})
|
||||
SET(opt_target_objects ${opt_target_objects} ${BASIC_OPS_X86_TARGET_O})
|
||||
|
||||
ENDFOREACH(opt_target ${opt_targets})
|
||||
SET(lmms_SOURCES ${lmms_SOURCES} ${opt_target_objects})
|
||||
# to be used by maintainer with special ultra-optimizing super duper GCC
|
||||
ENDIF(LMMS_HOST_X86 OR LMMS_HOST_X86_64)
|
||||
|
||||
IF(WIN32)
|
||||
SET(WINRC "${CMAKE_BINARY_DIR}/lmmsrc.obj")
|
||||
|
||||
37
ChangeLog
37
ChangeLog
@@ -1,3 +1,40 @@
|
||||
2008-11-10 Tobias Doerffel <tobydox/at/users/dot/sourceforge/dot/net>
|
||||
|
||||
* include/audio_portaudio.h:
|
||||
* include/lmms_basics.h:
|
||||
* include/fifo_buffer.h:
|
||||
* include/mixer.h:
|
||||
* include/audio_port.h:
|
||||
* include/audio_dummy.h:
|
||||
* include/basic_ops.h:
|
||||
* include/audio_sdl.h:
|
||||
* include/audio_jack.h:
|
||||
* include/audio_device.h:
|
||||
* src/core/audio/audio_device.cpp:
|
||||
* src/core/audio/audio_alsa.cpp:
|
||||
* src/core/audio/audio_file_wave.cpp:
|
||||
* src/core/audio/audio_sdl.cpp:
|
||||
* src/core/audio/audio_oss.cpp:
|
||||
* src/core/audio/audio_port.cpp:
|
||||
* src/core/audio/audio_portaudio.cpp:
|
||||
* src/core/audio/audio_jack.cpp:
|
||||
* src/core/audio/audio_pulseaudio.cpp:
|
||||
* src/core/basic_ops.cpp:
|
||||
* src/core/basic_ops_x86.c:
|
||||
* src/core/basic_ops_x86_mmx.s:
|
||||
* src/core/basic_ops_x86_sse.s:
|
||||
* src/core/basic_ops_x86_sse2.s:
|
||||
* src/core/basic_ops_x86_64_sse.s:
|
||||
* src/core/basic_ops_x86_64_sse2.s:
|
||||
* src/core/mixer.cpp:
|
||||
* src/core/main.cpp:
|
||||
* src/core/project_renderer.cpp:
|
||||
* src/core/fx_mixer.cpp:
|
||||
* plugins/ladspa_effect/ladspa_effect.cpp:
|
||||
* lmmsconfig.h.in:
|
||||
* CMakeLists.txt:
|
||||
experimental support for MMX/SSE/SSE2 instructions
|
||||
|
||||
2008-11-04 Tobias Doerffel <tobydox/at/users/dot/sourceforge/dot/net>
|
||||
|
||||
* plugins/sf2_player/sf2_player.cpp:
|
||||
|
||||
@@ -121,31 +121,22 @@ public:
|
||||
protected:
|
||||
// subclasses can re-implement this for being used in conjunction with
|
||||
// processNextBuffer()
|
||||
virtual void writeBuffer( const surroundSampleFrame * /* _buf*/,
|
||||
virtual void writeBuffer( const sampleFrameA * /* _buf*/,
|
||||
const fpp_t /*_frames*/,
|
||||
const float /*_master_gain*/ )
|
||||
{
|
||||
}
|
||||
|
||||
// called by according driver for fetching new sound-data
|
||||
fpp_t getNextBuffer( surroundSampleFrame * _ab );
|
||||
|
||||
// convert a given audio-buffer to a buffer in signed 16-bit samples
|
||||
// returns num of bytes in outbuf
|
||||
Uint32 convertToS16( const surroundSampleFrame * _ab,
|
||||
const fpp_t _frames,
|
||||
const float _master_gain,
|
||||
int_sample_t * _output_buffer,
|
||||
const bool _convert_endian = FALSE );
|
||||
fpp_t getNextBuffer( sampleFrameA * _ab );
|
||||
|
||||
// clear given signed-int-16-buffer
|
||||
void clearS16Buffer( int_sample_t * _outbuf,
|
||||
const fpp_t _frames );
|
||||
void clearS16Buffer( intSampleFrameA * _outbuf, const fpp_t _frames );
|
||||
|
||||
// resample given buffer from samplerate _src_sr to samplerate _dst_sr
|
||||
void resample( const surroundSampleFrame * _src,
|
||||
void resample( const sampleFrameA * _src,
|
||||
const fpp_t _frames,
|
||||
surroundSampleFrame * _dst,
|
||||
sampleFrameA * _dst,
|
||||
const sample_rate_t _src_sr,
|
||||
const sample_rate_t _dst_sr );
|
||||
|
||||
@@ -161,9 +152,11 @@ protected:
|
||||
|
||||
bool hqAudio( void ) const;
|
||||
|
||||
|
||||
protected:
|
||||
bool m_supportsCapture;
|
||||
|
||||
|
||||
private:
|
||||
sample_rate_t m_sampleRate;
|
||||
ch_cnt_t m_channels;
|
||||
@@ -175,7 +168,7 @@ private:
|
||||
SRC_DATA m_srcData;
|
||||
SRC_STATE * m_srcState;
|
||||
|
||||
surroundSampleFrame * m_buffer;
|
||||
sampleFrameA * m_buffer;
|
||||
|
||||
} ;
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#define _AUDIO_DUMMY_H
|
||||
|
||||
#include "audio_device.h"
|
||||
#include "basic_ops.h"
|
||||
#include "micro_timer.h"
|
||||
|
||||
|
||||
@@ -94,16 +95,16 @@ private:
|
||||
virtual void run( void )
|
||||
{
|
||||
microTimer timer;
|
||||
while( TRUE )
|
||||
while( true )
|
||||
{
|
||||
timer.reset();
|
||||
const surroundSampleFrame * b =
|
||||
surroundSampleFrame * b =
|
||||
getMixer()->nextBuffer();
|
||||
if( !b )
|
||||
{
|
||||
break;
|
||||
}
|
||||
delete[] b;
|
||||
alignedFreeFrames( b );
|
||||
|
||||
const Sint32 microseconds = static_cast<Sint32>(
|
||||
getMixer()->framesPerPeriod() *
|
||||
|
||||
@@ -94,7 +94,7 @@ private:
|
||||
QSemaphore m_stop_semaphore;
|
||||
|
||||
QVector<jack_port_t *> m_outputPorts;
|
||||
surroundSampleFrame * m_outBuf;
|
||||
sampleFrameA * m_outBuf;
|
||||
|
||||
|
||||
f_cnt_t m_framesDoneInCurBuf;
|
||||
|
||||
@@ -40,14 +40,14 @@ public:
|
||||
audioPort( const QString & _name, bool _has_effect_chain = true );
|
||||
~audioPort();
|
||||
|
||||
inline sampleFrame * firstBuffer( void )
|
||||
inline sampleFrameA * firstBuffer( void )
|
||||
{
|
||||
return( m_firstBuffer );
|
||||
return m_firstBuffer;
|
||||
}
|
||||
|
||||
inline sampleFrame * secondBuffer( void )
|
||||
inline sampleFrameA * secondBuffer( void )
|
||||
{
|
||||
return( m_secondBuffer );
|
||||
return m_secondBuffer;
|
||||
}
|
||||
|
||||
inline void lockFirstBuffer( void )
|
||||
@@ -76,7 +76,7 @@ public:
|
||||
// indicate whether JACK & Co should provide output-buffer at ext. port
|
||||
inline bool extOutputEnabled( void ) const
|
||||
{
|
||||
return( m_extOutputEnabled );
|
||||
return m_extOutputEnabled;
|
||||
}
|
||||
|
||||
void setExtOutputEnabled( bool _enabled );
|
||||
@@ -86,12 +86,12 @@ public:
|
||||
// (-1 = none 0 = master)
|
||||
inline fx_ch_t nextFxChannel( void ) const
|
||||
{
|
||||
return( m_nextFxChannel );
|
||||
return m_nextFxChannel;
|
||||
}
|
||||
|
||||
inline effectChain * getEffects( void )
|
||||
{
|
||||
return( m_effects );
|
||||
return m_effects;
|
||||
}
|
||||
|
||||
void setNextFxChannel( const fx_ch_t _chnl )
|
||||
@@ -102,7 +102,7 @@ public:
|
||||
|
||||
const QString & name( void ) const
|
||||
{
|
||||
return( m_name );
|
||||
return m_name;
|
||||
}
|
||||
|
||||
void setName( const QString & _new_name );
|
||||
@@ -122,8 +122,8 @@ public:
|
||||
private:
|
||||
volatile bufferUsages m_bufferUsage;
|
||||
|
||||
sampleFrame * m_firstBuffer;
|
||||
sampleFrame * m_secondBuffer;
|
||||
sampleFrameA * m_firstBuffer;
|
||||
sampleFrameA * m_secondBuffer;
|
||||
QMutex m_firstBufferLock;
|
||||
QMutex m_secondBufferLock;
|
||||
|
||||
|
||||
@@ -140,7 +140,7 @@ private:
|
||||
|
||||
bool m_wasPAInitError;
|
||||
|
||||
surroundSampleFrame * m_outBuf;
|
||||
sampleFrameA * m_outBuf;
|
||||
int m_outBufPos;
|
||||
int m_outBufSize;
|
||||
|
||||
|
||||
@@ -76,8 +76,8 @@ private:
|
||||
|
||||
SDL_AudioSpec m_audioHandle;
|
||||
|
||||
surroundSampleFrame * m_outBuf;
|
||||
Uint8 * m_convertedBuf;
|
||||
sampleFrameA * m_outBuf;
|
||||
intSampleFrameA * m_convertedBuf;
|
||||
int m_convertedBufPos;
|
||||
int m_convertedBufSize;
|
||||
|
||||
|
||||
94
include/basic_ops.h
Normal file
94
include/basic_ops.h
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* basic_ops.h - basic memory operations
|
||||
*
|
||||
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program (see COPYING); if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
* Boston, MA 02110-1301 USA.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _BASIC_OPS_H
|
||||
#define _BASIC_OPS_H
|
||||
|
||||
#include "lmms_basics.h"
|
||||
|
||||
#ifdef LMMS_HAVE_STDBOOL_H
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
void initBasicOps( void );
|
||||
|
||||
void * alignedMalloc( int _bytes );
|
||||
void alignedFree( void * _buf );
|
||||
|
||||
sampleFrameA * alignedAllocFrames( int _frames );
|
||||
void alignedFreeFrames( sampleFrameA * _buf );
|
||||
|
||||
|
||||
// all aligned* functions assume data to be 16 byte aligned and size to be
|
||||
// multiples of 64
|
||||
typedef void (*alignedMemCpyFunc)( void * RP _dst, const void * RP _src,
|
||||
int _size );
|
||||
typedef void (*alignedMemClearFunc)( void * RP _dst, int _size );
|
||||
typedef void (*alignedBufApplyGainFunc)( sampleFrameA * RP _dst,
|
||||
float _gain, int _frames );
|
||||
typedef void (*alignedBufMixFunc)( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
int _frames );
|
||||
typedef void (*alignedBufMixLRCoeffFunc)( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _left, float _right,
|
||||
int _frames );
|
||||
typedef void (*unalignedBufMixLRCoeffFunc)( sampleFrame * RP _dst,
|
||||
const sampleFrame * RP _src,
|
||||
float _left, float _right,
|
||||
int _frames );
|
||||
typedef void (*alignedBufWetDryMixFunc)( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _wet, float _dry, int _frames );
|
||||
typedef void (*alignedBufWetDryMixSplittedFunc)( sampleFrameA * RP _dst,
|
||||
const float * RP _left,
|
||||
const float * RP _right,
|
||||
float _wet, float _dry, int _frames );
|
||||
typedef int (*alignedConvertToS16Func)( const sampleFrameA * RP _src,
|
||||
intSampleFrameA * RP _dst,
|
||||
const fpp_t _frames,
|
||||
const float _master_gain,
|
||||
const bool _convert_endian );
|
||||
|
||||
extern alignedMemCpyFunc alignedMemCpy;
|
||||
extern alignedMemClearFunc alignedMemClear;
|
||||
extern alignedBufApplyGainFunc alignedBufApplyGain;
|
||||
extern alignedBufMixFunc alignedBufMix;
|
||||
extern alignedBufMixLRCoeffFunc alignedBufMixLRCoeff;
|
||||
extern unalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff;
|
||||
extern alignedBufWetDryMixFunc alignedBufWetDryMix;
|
||||
extern alignedBufWetDryMixSplittedFunc alignedBufWetDryMixSplitted;
|
||||
extern alignedConvertToS16Func alignedConvertToS16;
|
||||
|
||||
|
||||
#ifdef LMMS_HOST_X86
|
||||
#define X86_OPTIMIZATIONS
|
||||
#endif
|
||||
#ifdef LMMS_HOST_X86_64
|
||||
#define X86_OPTIMIZATIONS
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
* fifo_buffer.h - FIFO fixed-size buffer
|
||||
*
|
||||
* Copyright (c) 2007 Javier Serrano Polo <jasp00/at/users.sourceforge.net>
|
||||
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
@@ -33,50 +34,50 @@ class fifoBuffer
|
||||
{
|
||||
public:
|
||||
fifoBuffer( int _size ) :
|
||||
m_reader_sem( _size ),
|
||||
m_writer_sem( _size ),
|
||||
m_reader_index( 0 ),
|
||||
m_writer_index( 0 ),
|
||||
m_readerSem( _size ),
|
||||
m_writerSem( _size ),
|
||||
m_readerIndex( 0 ),
|
||||
m_writerIndex( 0 ),
|
||||
m_size( _size )
|
||||
{
|
||||
m_buffer = new T[_size];
|
||||
m_reader_sem.acquire( _size );
|
||||
m_readerSem.acquire( _size );
|
||||
}
|
||||
|
||||
~fifoBuffer()
|
||||
{
|
||||
delete[] m_buffer;
|
||||
m_reader_sem.release( m_size );
|
||||
m_readerSem.release( m_size );
|
||||
}
|
||||
|
||||
void write( T _element )
|
||||
{
|
||||
m_writer_sem.acquire();
|
||||
m_buffer[m_writer_index++] = _element;
|
||||
m_writer_index %= m_size;
|
||||
m_reader_sem.release();
|
||||
m_writerSem.acquire();
|
||||
m_buffer[m_writerIndex++] = _element;
|
||||
m_writerIndex %= m_size;
|
||||
m_readerSem.release();
|
||||
}
|
||||
|
||||
T read( void )
|
||||
{
|
||||
m_reader_sem.acquire();
|
||||
T element = m_buffer[m_reader_index++];
|
||||
m_reader_index %= m_size;
|
||||
m_writer_sem.release();
|
||||
return( element );
|
||||
m_readerSem.acquire();
|
||||
T element = m_buffer[m_readerIndex++];
|
||||
m_readerIndex %= m_size;
|
||||
m_writerSem.release();
|
||||
return element;
|
||||
}
|
||||
|
||||
bool available( void )
|
||||
{
|
||||
return( m_reader_sem.available() );
|
||||
return m_readerSem.available();
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
QSemaphore m_reader_sem;
|
||||
QSemaphore m_writer_sem;
|
||||
int m_reader_index;
|
||||
int m_writer_index;
|
||||
QSemaphore m_readerSem;
|
||||
QSemaphore m_writerSem;
|
||||
int m_readerIndex;
|
||||
int m_writerIndex;
|
||||
int m_size;
|
||||
T * m_buffer;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* types.h - typedefs for common types that are used in the whole app
|
||||
* lmms_basics.h - common basics for the whole App
|
||||
*
|
||||
* Copyright (c) 2004-2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
@@ -23,10 +23,8 @@
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _TYPES_H
|
||||
#define _TYPES_H
|
||||
|
||||
#include <limits>
|
||||
#ifndef _LMMS_BASICS_H
|
||||
#define _LMMS_BASICS_H
|
||||
|
||||
#include "lmmsconfig.h"
|
||||
|
||||
@@ -68,6 +66,9 @@ typedef Uint32 jo_id_t; // (unique) ID of a journalling object
|
||||
#define likely(x) __builtin_expect((x),1)
|
||||
#define unlikely(x) __builtin_expect((x),0)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#include <limits>
|
||||
|
||||
template<typename T>
|
||||
struct typeInfo
|
||||
@@ -115,25 +116,50 @@ inline bool typeInfo<float>::isEqual( float _x, float _y )
|
||||
return absVal( _x - _y ) < minEps();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
const ch_cnt_t DEFAULT_CHANNELS = 2;
|
||||
|
||||
const ch_cnt_t SURROUND_CHANNELS =
|
||||
#define DEFAULT_CHANNELS 2
|
||||
#define LMMS_DISABLE_SURROUND
|
||||
#ifndef LMMS_DISABLE_SURROUND
|
||||
4;
|
||||
#ifdef LMMS_DISABLE_SURROUND
|
||||
#define SURROUND_CHANNELS 2
|
||||
#else
|
||||
2;
|
||||
#define SURROUND_CHANNELS 4
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
typedef sample_t sampleFrame[DEFAULT_CHANNELS];
|
||||
typedef sample_t surroundSampleFrame[SURROUND_CHANNELS];
|
||||
|
||||
#define ALIGN_SIZE 16
|
||||
|
||||
#if __GNUC__
|
||||
|
||||
typedef sample_t sampleFrameA[DEFAULT_CHANNELS] __attribute__((__aligned__(ALIGN_SIZE)));
|
||||
typedef int_sample_t intSampleFrameA[DEFAULT_CHANNELS] __attribute__((__aligned__(ALIGN_SIZE)));
|
||||
#define RP __restrict__
|
||||
|
||||
#else
|
||||
|
||||
#define RP
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
const int BYTES_PER_SAMPLE = sizeof( sample_t );
|
||||
const int BYTES_PER_INT_SAMPLE = sizeof( int_sample_t );
|
||||
const int BYTES_PER_FRAME = sizeof( sampleFrame );
|
||||
const int BYTES_PER_SURROUND_FRAME = sizeof( surroundSampleFrame );
|
||||
|
||||
const float OUTPUT_SAMPLE_MULTIPLIER = 32767.0f;
|
||||
#else
|
||||
#define BYTES_PER_SAMPLE sizeof( sample_t )
|
||||
#define BYTES_PER_INT_SAMPLE sizeof( int_sample_t )
|
||||
#define BYTES_PER_FRAME sizeof( sampleFrame )
|
||||
#define BYTES_PER_SURROUND_FRAME sizeof( surroundSampleFrame )
|
||||
#define OUTPUT_SAMPLE_MULTIPLIER 32767.0f
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -57,13 +57,6 @@ class audioPort;
|
||||
|
||||
const fpp_t DEFAULT_BUFFER_SIZE = 256;
|
||||
|
||||
const int BYTES_PER_SAMPLE = sizeof( sample_t );
|
||||
const int BYTES_PER_INT_SAMPLE = sizeof( int_sample_t );
|
||||
const int BYTES_PER_FRAME = sizeof( sampleFrame );
|
||||
const int BYTES_PER_SURROUND_FRAME = sizeof( surroundSampleFrame );
|
||||
|
||||
const float OUTPUT_SAMPLE_MULTIPLIER = 32767.0f;
|
||||
|
||||
|
||||
const float BaseFreq = 440.0f;
|
||||
const Keys BaseKey = Key_A;
|
||||
@@ -361,7 +354,7 @@ public:
|
||||
return m_inputBufferFrames[ m_inputBufferRead ];
|
||||
}
|
||||
|
||||
inline const surroundSampleFrame * nextBuffer( void )
|
||||
inline surroundSampleFrame * nextBuffer( void )
|
||||
{
|
||||
return hasFifoWriter() ? m_fifo->read() : renderNextBuffer();
|
||||
}
|
||||
@@ -407,7 +400,7 @@ private:
|
||||
midiClient * tryMidiClients( void );
|
||||
|
||||
|
||||
const surroundSampleFrame * renderNextBuffer( void );
|
||||
surroundSampleFrame * renderNextBuffer( void );
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#cmakedefine LMMS_HAVE_VST
|
||||
|
||||
#cmakedefine LMMS_HAVE_STDINT_H
|
||||
#cmakedefine LMMS_HAVE_STDBOOL_H
|
||||
#cmakedefine LMMS_HAVE_STDLIB_H
|
||||
#cmakedefine LMMS_HAVE_PTHREAD_H
|
||||
#cmakedefine LMMS_HAVE_UNISTD_H
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "ladspa_subplugin_features.h"
|
||||
#include "mixer.h"
|
||||
#include "effect_chain.h"
|
||||
#include "basic_ops.h"
|
||||
#include "automation_pattern.h"
|
||||
|
||||
|
||||
@@ -144,7 +145,7 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
|
||||
if( m_maxSampleRate < engine::getMixer()->processingSampleRate() )
|
||||
{
|
||||
o_buf = _buf;
|
||||
_buf = new sampleFrame[_frames];
|
||||
_buf = alignedAllocFrames( _frames );
|
||||
sampleDown( o_buf, _buf, m_maxSampleRate );
|
||||
frames = _frames * m_maxSampleRate /
|
||||
engine::getMixer()->processingSampleRate();
|
||||
@@ -217,8 +218,8 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
|
||||
// Copy the LADSPA output buffers to the LMMS buffer.
|
||||
double out_sum = 0.0;
|
||||
channel = 0;
|
||||
const float d = getDryLevel();
|
||||
const float w = getWetLevel();
|
||||
float * buffers[2];
|
||||
|
||||
for( ch_cnt_t proc = 0; proc < getProcessorCount(); ++proc )
|
||||
{
|
||||
for( int port = 0; port < m_portCount; ++port )
|
||||
@@ -231,17 +232,9 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
|
||||
case CONTROL_RATE_INPUT:
|
||||
break;
|
||||
case CHANNEL_OUT:
|
||||
for( fpp_t frame = 0;
|
||||
frame < frames; ++frame )
|
||||
if( channel < DEFAULT_CHANNELS )
|
||||
{
|
||||
_buf[frame][channel] =
|
||||
d *
|
||||
_buf[frame][channel] +
|
||||
w *
|
||||
pp->buffer[frame];
|
||||
out_sum +=
|
||||
_buf[frame][channel] *
|
||||
_buf[frame][channel];
|
||||
buffers[channel] = pp->buffer;
|
||||
}
|
||||
++channel;
|
||||
break;
|
||||
@@ -254,10 +247,27 @@ bool ladspaEffect::processAudioBuffer( sampleFrame * _buf,
|
||||
}
|
||||
}
|
||||
|
||||
if( channel == 1 )
|
||||
{
|
||||
buffers[1] = buffers[0];
|
||||
}
|
||||
if( channel >= 1 && channel <= DEFAULT_CHANNELS )
|
||||
{
|
||||
alignedBufWetDryMixSplitted( _buf, buffers[0], buffers[1],
|
||||
getWetLevel(), getDryLevel(), frames );
|
||||
}
|
||||
|
||||
for( int i = 0; i < frames; ++i )
|
||||
{
|
||||
out_sum += _buf[i][0]*_buf[i][0];
|
||||
out_sum += _buf[i][1]*_buf[i][1];
|
||||
|
||||
}
|
||||
|
||||
if( o_buf != NULL )
|
||||
{
|
||||
sampleBack( _buf, o_buf, m_maxSampleRate );
|
||||
delete[] _buf;
|
||||
alignedFreeFrames( _buf );
|
||||
}
|
||||
|
||||
checkGate( out_sum / frames );
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "lcd_spinbox.h"
|
||||
#include "gui_templates.h"
|
||||
#include "templates.h"
|
||||
#include "basic_ops.h"
|
||||
|
||||
|
||||
|
||||
@@ -229,13 +230,15 @@ void audioALSA::applyQualitySettings( void )
|
||||
|
||||
void audioALSA::run( void )
|
||||
{
|
||||
surroundSampleFrame * temp =
|
||||
new surroundSampleFrame[getMixer()->framesPerPeriod()];
|
||||
int_sample_t * outbuf =
|
||||
new int_sample_t[getMixer()->framesPerPeriod() *
|
||||
channels()];
|
||||
sampleFrameA * temp = alignedAllocFrames(
|
||||
getMixer()->framesPerPeriod() );
|
||||
intSampleFrameA * outbuf = (intSampleFrameA *)
|
||||
alignedMalloc( sizeof( intSampleFrameA ) * channels() /
|
||||
DEFAULT_CHANNELS * getMixer()->framesPerPeriod() );
|
||||
|
||||
int_sample_t * pcmbuf = new int_sample_t[m_periodSize * channels()];
|
||||
|
||||
|
||||
int outbuf_size = getMixer()->framesPerPeriod() * channels();
|
||||
int outbuf_pos = 0;
|
||||
int pcmbuf_size = m_periodSize * channels();
|
||||
@@ -254,16 +257,15 @@ void audioALSA::run( void )
|
||||
if( !frames )
|
||||
{
|
||||
quit = TRUE;
|
||||
memset( ptr, 0, len
|
||||
alignedMemClear( ptr, len
|
||||
* sizeof( int_sample_t ) );
|
||||
break;
|
||||
}
|
||||
outbuf_size = frames * channels();
|
||||
|
||||
convertToS16( temp, frames,
|
||||
alignedConvertToS16( temp, outbuf, frames,
|
||||
getMixer()->masterGain(),
|
||||
outbuf,
|
||||
m_convertEndian );
|
||||
m_convertEndian );
|
||||
}
|
||||
int min_len = qMin( len, outbuf_size - outbuf_pos );
|
||||
memcpy( ptr, outbuf + outbuf_pos,
|
||||
@@ -300,8 +302,8 @@ void audioALSA::run( void )
|
||||
}
|
||||
}
|
||||
|
||||
delete[] temp;
|
||||
delete[] outbuf;
|
||||
alignedFreeFrames( temp );
|
||||
alignedFree( outbuf );
|
||||
delete[] pcmbuf;
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "audio_device.h"
|
||||
#include "config_mgr.h"
|
||||
#include "debug.h"
|
||||
#include "basic_ops.h"
|
||||
|
||||
|
||||
|
||||
@@ -39,7 +40,7 @@ audioDevice::audioDevice( const ch_cnt_t _channels, mixer * _mixer ) :
|
||||
m_sampleRate( _mixer->processingSampleRate() ),
|
||||
m_channels( _channels ),
|
||||
m_mixer( _mixer ),
|
||||
m_buffer( new surroundSampleFrame[getMixer()->framesPerPeriod()] )
|
||||
m_buffer( alignedAllocFrames( getMixer()->framesPerPeriod() ) )
|
||||
{
|
||||
int error;
|
||||
if( ( m_srcState = src_new(
|
||||
@@ -56,7 +57,7 @@ audioDevice::audioDevice( const ch_cnt_t _channels, mixer * _mixer ) :
|
||||
audioDevice::~audioDevice()
|
||||
{
|
||||
src_delete( m_srcState );
|
||||
delete[] m_buffer;
|
||||
alignedFreeFrames( m_buffer );
|
||||
|
||||
m_devMutex.tryLock();
|
||||
unlock();
|
||||
@@ -81,10 +82,10 @@ void audioDevice::processNextBuffer( void )
|
||||
|
||||
|
||||
|
||||
fpp_t audioDevice::getNextBuffer( surroundSampleFrame * _ab )
|
||||
fpp_t audioDevice::getNextBuffer( sampleFrameA * _ab )
|
||||
{
|
||||
fpp_t frames = getMixer()->framesPerPeriod();
|
||||
const surroundSampleFrame * b = getMixer()->nextBuffer();
|
||||
sampleFrameA * b = getMixer()->nextBuffer();
|
||||
if( !b )
|
||||
{
|
||||
return( 0 );
|
||||
@@ -103,7 +104,7 @@ fpp_t audioDevice::getNextBuffer( surroundSampleFrame * _ab )
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( _ab, b, frames * sizeof( surroundSampleFrame ) );
|
||||
alignedMemCpy( _ab, b, frames * sizeof( surroundSampleFrame ) );
|
||||
}
|
||||
|
||||
// release lock
|
||||
@@ -111,10 +112,10 @@ fpp_t audioDevice::getNextBuffer( surroundSampleFrame * _ab )
|
||||
|
||||
if( getMixer()->hasFifoWriter() )
|
||||
{
|
||||
delete[] b;
|
||||
alignedFreeFrames( b );
|
||||
}
|
||||
|
||||
return( frames );
|
||||
return frames;
|
||||
}
|
||||
|
||||
|
||||
@@ -171,11 +172,10 @@ void audioDevice::renamePort( audioPort * )
|
||||
|
||||
|
||||
|
||||
void audioDevice::resample( const surroundSampleFrame * _src,
|
||||
const fpp_t _frames,
|
||||
surroundSampleFrame * _dst,
|
||||
const sample_rate_t _src_sr,
|
||||
const sample_rate_t _dst_sr )
|
||||
void audioDevice::resample( const sampleFrame * _src, const fpp_t _frames,
|
||||
sampleFrame * _dst,
|
||||
const sample_rate_t _src_sr,
|
||||
const sample_rate_t _dst_sr )
|
||||
{
|
||||
if( m_srcState == NULL )
|
||||
{
|
||||
@@ -197,57 +197,11 @@ void audioDevice::resample( const surroundSampleFrame * _src,
|
||||
|
||||
|
||||
|
||||
Uint32 audioDevice::convertToS16( const surroundSampleFrame * _ab,
|
||||
const fpp_t _frames,
|
||||
const float _master_gain,
|
||||
int_sample_t * _output_buffer,
|
||||
const bool _convert_endian )
|
||||
|
||||
void audioDevice::clearS16Buffer( intSampleFrameA * _outbuf, const fpp_t _frames )
|
||||
{
|
||||
if( _convert_endian )
|
||||
{
|
||||
Uint16 temp;
|
||||
for( fpp_t frame = 0; frame < _frames; ++frame )
|
||||
{
|
||||
for( ch_cnt_t chnl = 0; chnl < channels(); ++chnl )
|
||||
{
|
||||
temp = static_cast<int_sample_t>(
|
||||
mixer::clip( _ab[frame][chnl] *
|
||||
_master_gain ) *
|
||||
OUTPUT_SAMPLE_MULTIPLIER );
|
||||
|
||||
( _output_buffer + frame * channels() )[chnl] =
|
||||
( temp & 0x00ff ) << 8 |
|
||||
( temp & 0xff00 ) >> 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( fpp_t frame = 0; frame < _frames; ++frame )
|
||||
{
|
||||
for( ch_cnt_t chnl = 0; chnl < channels(); ++chnl )
|
||||
{
|
||||
( _output_buffer + frame * channels() )[chnl] =
|
||||
static_cast<int_sample_t>(
|
||||
mixer::clip( _ab[frame][chnl] *
|
||||
_master_gain ) *
|
||||
OUTPUT_SAMPLE_MULTIPLIER );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return( _frames * channels() * BYTES_PER_INT_SAMPLE );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void audioDevice::clearS16Buffer( int_sample_t * _outbuf, const fpp_t _frames )
|
||||
{
|
||||
#ifdef LMMS_DEBUG
|
||||
assert( _outbuf != NULL );
|
||||
#endif
|
||||
memset( _outbuf, 0, _frames * channels() * BYTES_PER_INT_SAMPLE );
|
||||
alignedMemClear( _outbuf, _frames * sizeof( *_outbuf ) );
|
||||
// memset( _outbuf, 0, _frames * channels() * BYTES_PER_INT_SAMPLE );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
|
||||
#include "audio_file_wave.h"
|
||||
#include "endian_handling.h"
|
||||
#include "basic_ops.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
@@ -101,12 +102,14 @@ void audioFileWave::writeBuffer( const surroundSampleFrame * _ab,
|
||||
}
|
||||
else
|
||||
{
|
||||
int_sample_t * buf = new int_sample_t[_frames * channels()];
|
||||
convertToS16( _ab, _frames, _master_gain, buf,
|
||||
intSampleFrameA * buf = (intSampleFrameA *)
|
||||
alignedMalloc(
|
||||
sizeof( intSampleFrameA ) * _frames );
|
||||
alignedConvertToS16( _ab, buf, _frames, _master_gain,
|
||||
!isLittleEndian() );
|
||||
|
||||
sf_writef_short( m_sf, buf, _frames );
|
||||
delete[] buf;
|
||||
sf_writef_short( m_sf, (int_sample_t *) buf, _frames );
|
||||
alignedFree( buf );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "config_mgr.h"
|
||||
#include "lcd_spinbox.h"
|
||||
#include "audio_port.h"
|
||||
#include "basic_ops.h"
|
||||
|
||||
|
||||
|
||||
@@ -57,7 +58,7 @@ audioJACK::audioJACK( bool & _success_ful, mixer * _mixer ) :
|
||||
m_client( NULL ),
|
||||
m_active( FALSE ),
|
||||
m_stop_semaphore( 1 ),
|
||||
m_outBuf( new surroundSampleFrame[getMixer()->framesPerPeriod()] ),
|
||||
m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
|
||||
m_framesDoneInCurBuf( 0 ),
|
||||
m_framesToDoInCurBuf( 0 )
|
||||
{
|
||||
@@ -159,7 +160,7 @@ audioJACK::~audioJACK()
|
||||
jack_client_close( m_client );
|
||||
}
|
||||
|
||||
delete[] m_outBuf;
|
||||
alignedFreeFrames( m_outBuf );
|
||||
|
||||
}
|
||||
|
||||
@@ -367,14 +368,14 @@ int audioJACK::processCallback( jack_nframes_t _nframes, void * _udata )
|
||||
_this->m_framesDoneInCurBuf );
|
||||
if( ts == JackTransportRolling )
|
||||
{
|
||||
const float gain = _this->getMixer()->masterGain();
|
||||
for( Uint8 chnl = 0; chnl < _this->channels(); ++chnl )
|
||||
{
|
||||
for( jack_nframes_t frame = 0; frame < todo;
|
||||
++frame )
|
||||
{
|
||||
outbufs[chnl][done+frame] =
|
||||
_this->m_outBuf[_this->m_framesDoneInCurBuf+frame][chnl] *
|
||||
_this->getMixer()->masterGain();
|
||||
_this->m_outBuf[_this->m_framesDoneInCurBuf+frame][chnl] * gain;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "engine.h"
|
||||
#include "gui_templates.h"
|
||||
#include "templates.h"
|
||||
#include "basic_ops.h"
|
||||
|
||||
#ifdef LMMS_HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
@@ -298,13 +299,13 @@ void audioOSS::applyQualitySettings( void )
|
||||
|
||||
void audioOSS::run( void )
|
||||
{
|
||||
surroundSampleFrame * temp =
|
||||
new surroundSampleFrame[getMixer()->framesPerPeriod()];
|
||||
int_sample_t * outbuf =
|
||||
new int_sample_t[getMixer()->framesPerPeriod() *
|
||||
channels()];
|
||||
sampleFrameA * temp = alignedAllocFrames(
|
||||
getMixer()->framesPerPeriod() );
|
||||
intSampleFrameA * outbuf = (intSampleFrameA *)
|
||||
alignedMalloc( sizeof( intSampleFrameA ) *
|
||||
getMixer()->framesPerPeriod() );
|
||||
|
||||
while( TRUE )
|
||||
while( 1 )
|
||||
{
|
||||
const fpp_t frames = getNextBuffer( temp );
|
||||
if( !frames )
|
||||
@@ -312,8 +313,8 @@ void audioOSS::run( void )
|
||||
break;
|
||||
}
|
||||
|
||||
int bytes = convertToS16( temp, frames,
|
||||
getMixer()->masterGain(), outbuf,
|
||||
int bytes = alignedConvertToS16( temp, outbuf, frames,
|
||||
getMixer()->masterGain(),
|
||||
m_convertEndian );
|
||||
if( write( m_audioFD, outbuf, bytes ) != bytes )
|
||||
{
|
||||
@@ -321,8 +322,8 @@ void audioOSS::run( void )
|
||||
}
|
||||
}
|
||||
|
||||
delete[] temp;
|
||||
delete[] outbuf;
|
||||
alignedFreeFrames( temp );
|
||||
alignedFree( outbuf );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -26,13 +26,15 @@
|
||||
#include "audio_device.h"
|
||||
#include "effect_chain.h"
|
||||
#include "engine.h"
|
||||
#include "basic_ops.h"
|
||||
|
||||
|
||||
audioPort::audioPort( const QString & _name, bool _has_effect_chain ) :
|
||||
m_bufferUsage( NoUsage ),
|
||||
m_firstBuffer( new sampleFrame[engine::getMixer()->framesPerPeriod()] ),
|
||||
m_secondBuffer( new sampleFrame[
|
||||
engine::getMixer()->framesPerPeriod()] ),
|
||||
m_firstBuffer( alignedAllocFrames(
|
||||
engine::getMixer()->framesPerPeriod() ) ),
|
||||
m_secondBuffer( alignedAllocFrames(
|
||||
engine::getMixer()->framesPerPeriod() ) ),
|
||||
m_extOutputEnabled( false ),
|
||||
m_nextFxChannel( 0 ),
|
||||
m_name( "unnamed port" ),
|
||||
@@ -53,8 +55,8 @@ audioPort::~audioPort()
|
||||
{
|
||||
setExtOutputEnabled( false );
|
||||
engine::getMixer()->removeAudioPort( this );
|
||||
delete[] m_firstBuffer;
|
||||
delete[] m_secondBuffer;
|
||||
alignedFreeFrames( m_firstBuffer );
|
||||
alignedFreeFrames( m_secondBuffer );
|
||||
delete m_effects;
|
||||
}
|
||||
|
||||
|
||||
@@ -55,11 +55,12 @@ void audioPortAudioSetupUtil::updateChannels( void )
|
||||
|
||||
audioPortAudio::audioPortAudio( bool & _success_ful, mixer * _mixer ) :
|
||||
audioDevice( tLimit<ch_cnt_t>(
|
||||
configManager::inst()->value( "audioportaudio", "channels" ).toInt(),
|
||||
configManager::inst()->value( "audioportaudio",
|
||||
"channels" ).toInt(),
|
||||
DEFAULT_CHANNELS, SURROUND_CHANNELS ),
|
||||
_mixer ),
|
||||
m_wasPAInitError( false ),
|
||||
m_outBuf( new surroundSampleFrame[getMixer()->framesPerPeriod()] ),
|
||||
m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
|
||||
m_outBufPos( 0 ),
|
||||
m_stopSemaphore( 1 )
|
||||
{
|
||||
@@ -205,7 +206,7 @@ audioPortAudio::~audioPortAudio()
|
||||
{
|
||||
Pa_Terminate();
|
||||
}
|
||||
delete[] m_outBuf;
|
||||
alignedFreeFrames( m_outBuf );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include "lcd_spinbox.h"
|
||||
#include "gui_templates.h"
|
||||
#include "templates.h"
|
||||
#include "basic_ops.h"
|
||||
|
||||
|
||||
static void stream_write_callback(pa_stream *s, size_t length, void *userdata)
|
||||
@@ -230,8 +231,9 @@ void audioPulseAudio::run( void )
|
||||
void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
|
||||
{
|
||||
const fpp_t fpp = getMixer()->framesPerPeriod();
|
||||
surroundSampleFrame * temp = new surroundSampleFrame[fpp];
|
||||
Sint16 * pcmbuf = (Sint16*)pa_xmalloc( fpp * channels() * sizeof(Sint16) );
|
||||
sampleFrameA * temp = alignedAllocFrames( fpp );
|
||||
Sint16 * pcmbuf = (Sint16*)pa_xmalloc( fpp * channels() *
|
||||
sizeof(Sint16) );
|
||||
|
||||
size_t fd = 0;
|
||||
while( fd < length/4 )
|
||||
@@ -241,9 +243,10 @@ void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
|
||||
{
|
||||
return;
|
||||
}
|
||||
int bytes = convertToS16( temp, frames,
|
||||
int bytes = alignedConvertToS16( temp,
|
||||
(intSampleFrameA *) pcmbuf,
|
||||
frames,
|
||||
getMixer()->masterGain(),
|
||||
pcmbuf,
|
||||
m_convertEndian );
|
||||
if( bytes > 0 )
|
||||
{
|
||||
@@ -254,7 +257,7 @@ void audioPulseAudio::streamWriteCallback(pa_stream *s, size_t length)
|
||||
}
|
||||
|
||||
pa_xfree( pcmbuf );
|
||||
delete[] temp;
|
||||
alignedFreeFrames( temp );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -38,22 +38,22 @@
|
||||
#include "config_mgr.h"
|
||||
#include "gui_templates.h"
|
||||
#include "templates.h"
|
||||
|
||||
#include "basic_ops.h"
|
||||
|
||||
|
||||
|
||||
audioSDL::audioSDL( bool & _success_ful, mixer * _mixer ) :
|
||||
audioDevice( DEFAULT_CHANNELS, _mixer ),
|
||||
m_outBuf( new surroundSampleFrame[getMixer()->framesPerPeriod()] ),
|
||||
m_outBuf( alignedAllocFrames( getMixer()->framesPerPeriod() ) ),
|
||||
m_convertedBufPos( 0 ),
|
||||
m_convertEndian( false ),
|
||||
m_stopSemaphore( 1 )
|
||||
{
|
||||
_success_ful = FALSE;
|
||||
|
||||
m_convertedBufSize = getMixer()->framesPerPeriod() * channels()
|
||||
* sizeof( int_sample_t );
|
||||
m_convertedBuf = new Uint8[m_convertedBufSize];
|
||||
m_convertedBufSize = getMixer()->framesPerPeriod() *
|
||||
sizeof( intSampleFrameA );
|
||||
m_convertedBuf = (intSampleFrameA *) alignedMalloc( m_convertedBufSize );
|
||||
|
||||
|
||||
if( SDL_Init( SDL_INIT_AUDIO | SDL_INIT_NOPARACHUTE ) < 0 )
|
||||
@@ -97,8 +97,8 @@ audioSDL::~audioSDL()
|
||||
|
||||
SDL_CloseAudio();
|
||||
SDL_Quit();
|
||||
delete[] m_convertedBuf;
|
||||
delete[] m_outBuf;
|
||||
alignedFree( m_convertedBuf );
|
||||
alignedFreeFrames( m_outBuf );
|
||||
}
|
||||
|
||||
|
||||
@@ -190,12 +190,12 @@ void audioSDL::sdlAudioCallback( Uint8 * _buf, int _len )
|
||||
memset( _buf, 0, _len );
|
||||
return;
|
||||
}
|
||||
m_convertedBufSize = frames * channels()
|
||||
* sizeof( int_sample_t );
|
||||
m_convertedBufSize = frames * sizeof( intSampleFrameA );
|
||||
|
||||
convertToS16( m_outBuf, frames,
|
||||
alignedConvertToS16( m_outBuf,
|
||||
m_convertedBuf,
|
||||
frames,
|
||||
getMixer()->masterGain(),
|
||||
(int_sample_t *)m_convertedBuf,
|
||||
m_convertEndian );
|
||||
}
|
||||
const int min_len = qMin( _len, m_convertedBufSize
|
||||
|
||||
455
src/core/basic_ops.cpp
Normal file
455
src/core/basic_ops.cpp
Normal file
@@ -0,0 +1,455 @@
|
||||
/*
|
||||
* basic_ops.cpp - basic memory operations
|
||||
*
|
||||
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program (see COPYING); if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
* Boston, MA 02110-1301 USA.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "basic_ops.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <memory.h>
|
||||
|
||||
|
||||
|
||||
void * alignedMalloc( int _bytes )
|
||||
{
|
||||
char *ptr,*ptr2,*aligned_ptr;
|
||||
int align_mask = ALIGN_SIZE- 1;
|
||||
ptr =(char *) malloc( _bytes + ALIGN_SIZE + sizeof(int) );
|
||||
if( ptr == NULL )
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ptr2 = ptr + sizeof(int);
|
||||
aligned_ptr = ptr2 + ( ALIGN_SIZE- ( (size_t) ptr2 & align_mask ) );
|
||||
|
||||
|
||||
ptr2 = aligned_ptr - sizeof(int);
|
||||
*((int *) ptr2) = (int)( aligned_ptr - ptr );
|
||||
|
||||
return aligned_ptr;
|
||||
}
|
||||
|
||||
|
||||
void alignedFree( void * _buf )
|
||||
{
|
||||
if( _buf )
|
||||
{
|
||||
int * ptr2 = (int *) _buf - 1;
|
||||
void * buf2 = (char *) _buf - *ptr2;
|
||||
if( buf2 )
|
||||
{
|
||||
free( buf2 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sampleFrameA * alignedAllocFrames( int _n )
|
||||
{
|
||||
return (sampleFrameA *) alignedMalloc( _n * sizeof( sampleFrameA ) );
|
||||
}
|
||||
|
||||
|
||||
void alignedFreeFrames( sampleFrame * _buf )
|
||||
{
|
||||
alignedFree( _buf );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// slow fallback
|
||||
void alignedMemCpyNoOpt( void * RP _dst, const void * RP _src, int _size )
|
||||
{
|
||||
const int s = _size / ( sizeof( int ) * 16 );
|
||||
const int * RP src = (const int *) _src;
|
||||
int * RP dst = (int *) _dst;
|
||||
for( int i = 0; i < s; )
|
||||
{
|
||||
dst[i+0] = src[i+0];
|
||||
dst[i+1] = src[i+1];
|
||||
dst[i+2] = src[i+2];
|
||||
dst[i+3] = src[i+3];
|
||||
dst[i+4] = src[i+4];
|
||||
dst[i+5] = src[i+5];
|
||||
dst[i+6] = src[i+6];
|
||||
dst[i+7] = src[i+7];
|
||||
dst[i+8] = src[i+8];
|
||||
dst[i+9] = src[i+9];
|
||||
dst[i+10] = src[i+10];
|
||||
dst[i+11] = src[i+11];
|
||||
dst[i+12] = src[i+12];
|
||||
dst[i+13] = src[i+13];
|
||||
dst[i+14] = src[i+14];
|
||||
dst[i+15] = src[i+15];
|
||||
i += 16;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// slow fallback
|
||||
void alignedMemClearNoOpt( void * _dst, int _size )
|
||||
{
|
||||
const int s = _size / ( sizeof( int ) * 4 );
|
||||
int * dst = (int *) _dst;
|
||||
for( int i = 0; i < s; ++i )
|
||||
{
|
||||
dst[0] = 0;
|
||||
dst[1] = 0;
|
||||
dst[2] = 0;
|
||||
dst[3] = 0;
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void alignedBufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
|
||||
int _frames )
|
||||
{
|
||||
for( int i = 0; i < _frames; )
|
||||
{
|
||||
_dst[i+0][0] *= _gain;
|
||||
_dst[i+0][1] *= _gain;
|
||||
_dst[i+1][0] *= _gain;
|
||||
_dst[i+1][1] *= _gain;
|
||||
_dst[i+2][0] *= _gain;
|
||||
_dst[i+2][1] *= _gain;
|
||||
_dst[i+3][0] *= _gain;
|
||||
_dst[i+3][1] *= _gain;
|
||||
_dst[i+4][0] *= _gain;
|
||||
_dst[i+4][1] *= _gain;
|
||||
_dst[i+5][0] *= _gain;
|
||||
_dst[i+5][1] *= _gain;
|
||||
_dst[i+6][0] *= _gain;
|
||||
_dst[i+6][1] *= _gain;
|
||||
_dst[i+7][0] *= _gain;
|
||||
_dst[i+7][1] *= _gain;
|
||||
i += 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void alignedBufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
|
||||
int _frames )
|
||||
{
|
||||
for( int i = 0; i < _frames; )
|
||||
{
|
||||
_dst[i+0][0] += _src[i+0][0];
|
||||
_dst[i+0][1] += _src[i+0][1];
|
||||
_dst[i+1][0] += _src[i+1][0];
|
||||
_dst[i+1][1] += _src[i+1][1];
|
||||
_dst[i+2][0] += _src[i+2][0];
|
||||
_dst[i+2][1] += _src[i+2][1];
|
||||
_dst[i+3][0] += _src[i+3][0];
|
||||
_dst[i+3][1] += _src[i+3][1];
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void alignedBufMixLRCoeffNoOpt( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _left, float _right, int _frames )
|
||||
{
|
||||
for( int i = 0; i < _frames; )
|
||||
{
|
||||
_dst[i+0][0] += _src[i+0][0]*_left;
|
||||
_dst[i+0][1] += _src[i+0][1]*_right;
|
||||
_dst[i+1][0] += _src[i+1][0]*_left;
|
||||
_dst[i+1][1] += _src[i+1][1]*_right;
|
||||
_dst[i+2][0] += _src[i+2][0]*_left;
|
||||
_dst[i+2][1] += _src[i+2][1]*_right;
|
||||
_dst[i+3][0] += _src[i+3][0]*_left;
|
||||
_dst[i+3][1] += _src[i+3][1]*_right;
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void unalignedBufMixLRCoeffNoOpt( sampleFrame * RP _dst,
|
||||
const sampleFrame * RP _src,
|
||||
const float _left,
|
||||
const float _right,
|
||||
int _frames )
|
||||
{
|
||||
if( _frames % 2 )
|
||||
{
|
||||
_dst[0][0] += _src[0][0] * _left;
|
||||
_dst[0][1] += _src[0][1] * _right;
|
||||
++_src;
|
||||
++_dst;
|
||||
--_frames;
|
||||
}
|
||||
for( int i = 0; i < _frames; )
|
||||
{
|
||||
_dst[i+0][0] += _src[i+0][0]*_left;
|
||||
_dst[i+0][1] += _src[i+0][1]*_right;
|
||||
_dst[i+1][0] += _src[i+1][0]*_left;
|
||||
_dst[i+1][1] += _src[i+1][1]*_right;
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void alignedBufWetDryMixNoOpt( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _wet, float _dry, int _frames )
|
||||
{
|
||||
for( int i = 0; i < _frames; ++i )
|
||||
{
|
||||
_dst[i+0][0] = _dst[i+0][0]*_dry + _src[i+0][0]*_wet;
|
||||
_dst[i+0][1] = _dst[i+0][1]*_dry + _src[i+0][1]*_wet;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void alignedBufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,
|
||||
const float * RP _left,
|
||||
const float * RP _right,
|
||||
float _wet, float _dry, int _frames )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < _frames; ++i )
|
||||
{
|
||||
_dst[i+0][0] = _dst[i+0][0]*_dry + _left[i+0]*_wet;
|
||||
_dst[i+0][1] = _dst[i+0][1]*_dry + _right[i+0]*_wet;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int alignedConvertToS16NoOpt( const sampleFrameA * RP _src,
|
||||
intSampleFrameA * RP _dst,
|
||||
const fpp_t _frames,
|
||||
const float _master_gain,
|
||||
const bool _convert_endian )
|
||||
{
|
||||
int t1;
|
||||
int t2;
|
||||
const float f = _master_gain * OUTPUT_SAMPLE_MULTIPLIER;
|
||||
if( _convert_endian )
|
||||
{
|
||||
for( fpp_t frame = 0; frame < _frames; ++frame )
|
||||
{
|
||||
t1 = _src[frame][0] * f;
|
||||
t1 = unlikely( t1 > 32767 ) ? 32767 : t1;
|
||||
t1 = unlikely( t1 < -32768 ) ? -32768 : t1;
|
||||
_dst[frame][0] = ( t1 & 0x00ff) << 8 |
|
||||
( t1 & 0xff00 ) >> 8;
|
||||
|
||||
t2 = _src[frame][1] * f;
|
||||
t2 = unlikely( t2 > 32767 ) ? 32767 : t2;
|
||||
t2 = unlikely( t2 < -32768 ) ? -32768 : t2;
|
||||
_dst[frame][1] = ( t2 & 0x00ff) << 8 |
|
||||
( t2 & 0xff00 ) >> 8;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( fpp_t frame = 0; frame < _frames; ++frame )
|
||||
{
|
||||
t1 = _src[frame][0] * f;
|
||||
t1 = unlikely( t1 > 32767 ) ? 32767 : t1;
|
||||
t1 = unlikely( t1 < -32768 ) ? -32768 : t1;
|
||||
_dst[frame][0] = t1;
|
||||
|
||||
t2 = _src[frame][1] * f;
|
||||
t2 = unlikely( t2 > 32767 ) ? 32767 : t2;
|
||||
t2 = unlikely( t2 < -32768 ) ? -32768 : t2;
|
||||
_dst[frame][1] = t2;
|
||||
}
|
||||
}
|
||||
|
||||
return _frames * DEFAULT_CHANNELS * BYTES_PER_INT_SAMPLE;
|
||||
}
|
||||
|
||||
|
||||
alignedMemCpyFunc alignedMemCpy = alignedMemCpyNoOpt;
|
||||
alignedMemClearFunc alignedMemClear = alignedMemClearNoOpt;
|
||||
alignedBufApplyGainFunc alignedBufApplyGain = alignedBufApplyGainNoOpt;
|
||||
alignedBufMixFunc alignedBufMix = alignedBufMixNoOpt;
|
||||
alignedBufMixLRCoeffFunc alignedBufMixLRCoeff = alignedBufMixLRCoeffNoOpt;
|
||||
unalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff = unalignedBufMixLRCoeffNoOpt;
|
||||
alignedBufWetDryMixFunc alignedBufWetDryMix = alignedBufWetDryMixNoOpt;
|
||||
alignedBufWetDryMixSplittedFunc alignedBufWetDryMixSplitted = alignedBufWetDryMixSplittedNoOpt;
|
||||
alignedConvertToS16Func alignedConvertToS16 = alignedConvertToS16NoOpt;
|
||||
|
||||
|
||||
#ifdef X86_OPTIMIZATIONS
|
||||
enum CPUFeatures
|
||||
{
|
||||
None = 0,
|
||||
MMX = 0x1,
|
||||
MMXEXT = 0x2,
|
||||
MMX3DNOW = 0x4,
|
||||
MMX3DNOWEXT = 0x8,
|
||||
SSE = 0x10,
|
||||
SSE2 = 0x20,
|
||||
CMOV = 0x40,
|
||||
IWMMXT = 0x80
|
||||
};
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#ifdef LMMS_HOST_X86
|
||||
void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size );
|
||||
void alignedMemClearMMX( void * RP _dst, int _size );
|
||||
#endif
|
||||
void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size );
|
||||
void alignedMemClearSSE( void * RP _dst, int _size );
|
||||
void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames );
|
||||
void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames );
|
||||
void alignedBufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames );
|
||||
void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _src, const float _left, const float _right, int _frames );
|
||||
void alignedBufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames );
|
||||
void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames );
|
||||
#ifdef LMMS_HOST_X86
|
||||
void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size );
|
||||
void alignedMemClearSSE2( void * RP _dst, int _size );
|
||||
int alignedConvertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian );
|
||||
#endif
|
||||
} ;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
void initBasicOps( void )
|
||||
{
|
||||
#ifdef X86_OPTIMIZATIONS
|
||||
static bool extensions_checked = false;
|
||||
if( extensions_checked == false )
|
||||
{
|
||||
int features = 0;
|
||||
unsigned int result = 0;
|
||||
unsigned int extended_result = 0;
|
||||
asm( "push %%ebx\n"
|
||||
"pushf\n"
|
||||
"pop %%eax\n"
|
||||
"mov %%eax, %%ebx\n"
|
||||
"xor $0x00200000, %%eax\n"
|
||||
"push %%eax\n"
|
||||
"popf\n"
|
||||
"pushf\n"
|
||||
"pop %%eax\n"
|
||||
"xor %%edx, %%edx\n"
|
||||
"xor %%ebx, %%eax\n"
|
||||
"jz 1f\n"
|
||||
|
||||
"mov $0x00000001, %%eax\n"
|
||||
"cpuid\n"
|
||||
"1:\n"
|
||||
"pop %%ebx\n"
|
||||
"mov %%edx, %0\n"
|
||||
|
||||
: "=r" (result)
|
||||
:
|
||||
: "%eax", "%ecx", "%edx"
|
||||
);
|
||||
|
||||
asm( "push %%ebx\n"
|
||||
"pushf\n"
|
||||
"pop %%eax\n"
|
||||
"mov %%eax, %%ebx\n"
|
||||
"xor $0x00200000, %%eax\n"
|
||||
"push %%eax\n"
|
||||
"popf\n"
|
||||
"pushf\n"
|
||||
"pop %%eax\n"
|
||||
"xor %%edx, %%edx\n"
|
||||
"xor %%ebx, %%eax\n"
|
||||
"jz 2f\n"
|
||||
|
||||
"mov $0x80000000, %%eax\n"
|
||||
"cpuid\n"
|
||||
"cmp $0x80000000, %%eax\n"
|
||||
"jbe 2f\n"
|
||||
"mov $0x80000001, %%eax\n"
|
||||
"cpuid\n"
|
||||
"2:\n"
|
||||
"pop %%ebx\n"
|
||||
"mov %%edx, %0\n"
|
||||
|
||||
: "=r" (extended_result)
|
||||
:
|
||||
: "%eax", "%ecx", "%edx"
|
||||
);
|
||||
|
||||
if( result & (1u << 15) )
|
||||
features |= CMOV;
|
||||
if( result & (1u << 23) )
|
||||
features |= MMX;
|
||||
if( extended_result & (1u << 22) )
|
||||
features |= MMXEXT;
|
||||
if( extended_result & (1u << 31) )
|
||||
features |= MMX3DNOW;
|
||||
if( extended_result & (1u << 30) )
|
||||
features |= MMX3DNOWEXT;
|
||||
if( result & (1u << 25) )
|
||||
features |= SSE;
|
||||
if( result & (1u << 26) )
|
||||
features |= SSE2;
|
||||
|
||||
#ifdef LMMS_HOST_X86
|
||||
if( features & MMX )
|
||||
{
|
||||
alignedMemCpy = alignedMemCpyMMX;
|
||||
alignedMemClear = alignedMemClearMMX;
|
||||
}
|
||||
#endif
|
||||
if( features & SSE )
|
||||
{
|
||||
fprintf( stderr, "Using SSE optimized routines\n" );
|
||||
alignedMemCpy = alignedMemCpySSE;
|
||||
alignedMemClear = alignedMemClearSSE;
|
||||
alignedBufApplyGain = alignedBufApplyGainSSE;
|
||||
alignedBufMix = alignedBufMixSSE;
|
||||
alignedBufMixLRCoeff = alignedBufMixLRCoeffSSE;
|
||||
unalignedBufMixLRCoeff = unalignedBufMixLRCoeffSSE;
|
||||
alignedBufWetDryMix = alignedBufWetDryMixSSE;
|
||||
alignedBufWetDryMixSplitted =
|
||||
alignedBufWetDryMixSplittedSSE;
|
||||
}
|
||||
if( features & SSE2 )
|
||||
{
|
||||
fprintf( stderr, "Using SSE2 optimized routines\n" );
|
||||
alignedMemCpy = alignedMemCpySSE2;
|
||||
alignedMemClear = alignedMemClearSSE2;
|
||||
alignedConvertToS16 = alignedConvertToS16SSE2;
|
||||
}
|
||||
extensions_checked = true;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
395
src/core/basic_ops_x86.c
Normal file
395
src/core/basic_ops_x86.c
Normal file
@@ -0,0 +1,395 @@
|
||||
/*
|
||||
* basic_ops_x86.c - x86 specific optimized operations
|
||||
*
|
||||
* Copyright (c) 2008 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
||||
*
|
||||
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program (see COPYING); if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
* Boston, MA 02110-1301 USA.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "basic_ops.h"
|
||||
|
||||
#ifdef X86_OPTIMIZATIONS
|
||||
|
||||
#ifdef BUILD_MMX
|
||||
|
||||
#include <mmintrin.h>
|
||||
|
||||
void alignedMemCpyMMX( void * RP _dst, const void * RP _src, int _size )
|
||||
{
|
||||
const int s = _size / ( sizeof( __m64 ) * 8 );
|
||||
int i;
|
||||
char fpu_save[108];
|
||||
char * RP src = (char *) _src;
|
||||
char * RP dst = (char *) _dst;
|
||||
__asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) );
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"1: prefetchnta (%0)\n"
|
||||
" prefetchnta 64(%0)\n"
|
||||
" prefetchnta 128(%0)\n"
|
||||
" prefetchnta 192(%0)\n"
|
||||
" prefetchnta 256(%0)\n"
|
||||
: : "r" (src) );
|
||||
for(i=0; i<s; i++)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"1: prefetchnta 320(%0)\n"
|
||||
"2: movq (%0), %%mm0\n"
|
||||
" movq 8(%0), %%mm1\n"
|
||||
" movq 16(%0), %%mm2\n"
|
||||
" movq 24(%0), %%mm3\n"
|
||||
" movq %%mm0, (%1)\n"
|
||||
" movq %%mm1, 8(%1)\n"
|
||||
" movq %%mm2, 16(%1)\n"
|
||||
" movq %%mm3, 24(%1)\n"
|
||||
" movq 32(%0), %%mm0\n"
|
||||
" movq 40(%0), %%mm1\n"
|
||||
" movq 48(%0), %%mm2\n"
|
||||
" movq 56(%0), %%mm3\n"
|
||||
" movq %%mm0, 32(%1)\n"
|
||||
" movq %%mm1, 40(%1)\n"
|
||||
" movq %%mm2, 48(%1)\n"
|
||||
" movq %%mm3, 56(%1)\n"
|
||||
: : "r" (src), "r" (dst) : "memory");
|
||||
src+=64;
|
||||
dst+=64;
|
||||
}
|
||||
__asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) );
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void alignedMemClearMMX( void * RP _dst, int _size )
|
||||
{
|
||||
__m64 * dst = (__m64 *) _dst;
|
||||
const int s = _size / ( sizeof( *dst ) * 8 );
|
||||
__m64 val = _mm_setzero_si64();
|
||||
int i;
|
||||
for( i = 0; i < s; ++i )
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"movq %0, (%1)\n"
|
||||
"movq %0, 8(%1)\n"
|
||||
"movq %0, 16(%1)\n"
|
||||
"movq %0, 24(%1)\n"
|
||||
"movq %0, 32(%1)\n"
|
||||
"movq %0, 40(%1)\n"
|
||||
"movq %0, 48(%1)\n"
|
||||
"movq %0, 56(%1)\n"
|
||||
: : "y" (val), "r" (dst) : "memory" );
|
||||
dst += 8;
|
||||
}
|
||||
_mm_empty();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef BUILD_SSE
|
||||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
void alignedMemCpySSE( void * RP _dst, const void * RP _src, int _size )
|
||||
{
|
||||
__m128 * dst = (__m128 *) _dst;
|
||||
__m128 * src = (__m128 *) _src;
|
||||
const int s = _size / ( sizeof( *dst ) * 4 );
|
||||
int i;
|
||||
for( i = 0; i < s; ++i )
|
||||
{
|
||||
/* _mm_store_ps( dst+0, _mm_load_ps( src+0 ) );
|
||||
_mm_store_ps( dst+1, _mm_load_ps( src+1 ) );
|
||||
_mm_store_ps( dst+2, _mm_load_ps( src+2 ) );
|
||||
_mm_store_ps( dst+3, _mm_load_ps( src+3 ) );*/
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
dst[3] = src[3];
|
||||
src += 4;
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void alignedMemClearSSE( void * RP _dst, int _size )
|
||||
{
|
||||
__m128 * dst = (__m128 *) _dst;
|
||||
const int s = _size / ( sizeof( *dst ) * 4 );
|
||||
__m128 val = _mm_setzero_ps();
|
||||
int i;
|
||||
for( i = 0; i < s; ++i )
|
||||
{
|
||||
dst[0] = val;
|
||||
dst[1] = val;
|
||||
dst[2] = val;
|
||||
dst[3] = val;
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void alignedBufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < _frames; )
|
||||
{
|
||||
_dst[i+0][0] *= _gain;
|
||||
_dst[i+0][1] *= _gain;
|
||||
_dst[i+1][0] *= _gain;
|
||||
_dst[i+1][1] *= _gain;
|
||||
_dst[i+2][0] *= _gain;
|
||||
_dst[i+2][1] *= _gain;
|
||||
_dst[i+3][0] *= _gain;
|
||||
_dst[i+3][1] *= _gain;
|
||||
_dst[i+4][0] *= _gain;
|
||||
_dst[i+4][1] *= _gain;
|
||||
_dst[i+5][0] *= _gain;
|
||||
_dst[i+5][1] *= _gain;
|
||||
_dst[i+6][0] *= _gain;
|
||||
_dst[i+6][1] *= _gain;
|
||||
_dst[i+7][0] *= _gain;
|
||||
_dst[i+7][1] *= _gain;
|
||||
i += 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void alignedBufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
|
||||
int _frames )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < _frames; )
|
||||
{
|
||||
_dst[i+0][0] += _src[i+0][0];
|
||||
_dst[i+0][1] += _src[i+0][1];
|
||||
_dst[i+1][0] += _src[i+1][0];
|
||||
_dst[i+1][1] += _src[i+1][1];
|
||||
_dst[i+2][0] += _src[i+2][0];
|
||||
_dst[i+2][1] += _src[i+2][1];
|
||||
_dst[i+3][0] += _src[i+3][0];
|
||||
_dst[i+3][1] += _src[i+3][1];
|
||||
i += 4;
|
||||
_dst[i+0][0] += _src[i+0][0];
|
||||
_dst[i+0][1] += _src[i+0][1];
|
||||
_dst[i+1][0] += _src[i+1][0];
|
||||
_dst[i+1][1] += _src[i+1][1];
|
||||
_dst[i+2][0] += _src[i+2][0];
|
||||
_dst[i+2][1] += _src[i+2][1];
|
||||
_dst[i+3][0] += _src[i+3][0];
|
||||
_dst[i+3][1] += _src[i+3][1];
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void alignedBufMixLRCoeffSSE( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _left, float _right, int _frames )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < _frames; )
|
||||
{
|
||||
_dst[i+0][0] += _src[i+0][0]*_left;
|
||||
_dst[i+0][1] += _src[i+0][1]*_right;
|
||||
_dst[i+1][0] += _src[i+1][0]*_left;
|
||||
_dst[i+1][1] += _src[i+1][1]*_right;
|
||||
_dst[i+2][0] += _src[i+2][0]*_left;
|
||||
_dst[i+2][1] += _src[i+2][1]*_right;
|
||||
_dst[i+3][0] += _src[i+3][0]*_left;
|
||||
_dst[i+3][1] += _src[i+3][1]*_right;
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _src,
|
||||
const float _left,
|
||||
const float _right,
|
||||
int _frames )
|
||||
{
|
||||
int i;
|
||||
if( unlikely( _frames % 2 ) )
|
||||
{
|
||||
_dst[0][0] += _src[0][0] * _left;
|
||||
_dst[0][1] += _src[0][1] * _right;
|
||||
++_src;
|
||||
++_dst;
|
||||
--_frames;
|
||||
}
|
||||
|
||||
for( i = 0; i < _frames; )
|
||||
{
|
||||
_dst[i+0][0] += _src[i+0][0]*_left;
|
||||
_dst[i+0][1] += _src[i+0][1]*_right;
|
||||
_dst[i+1][0] += _src[i+1][0]*_left;
|
||||
_dst[i+1][1] += _src[i+1][1]*_right;
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void alignedBufWetDryMixSSE( sampleFrameA * RP _dst,
|
||||
const sampleFrameA * RP _src,
|
||||
float _wet, float _dry, int _frames )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < _frames; )
|
||||
{
|
||||
_dst[i+0][0] = _dst[i+0][0]*_dry + _src[i+0][0]*_wet;
|
||||
_dst[i+0][1] = _dst[i+0][1]*_dry + _src[i+0][1]*_wet;
|
||||
_dst[i+1][0] = _dst[i+1][0]*_dry + _src[i+1][0]*_wet;
|
||||
_dst[i+1][1] = _dst[i+1][1]*_dry + _src[i+1][1]*_wet;
|
||||
_dst[i+2][0] = _dst[i+2][0]*_dry + _src[i+2][0]*_wet;
|
||||
_dst[i+2][1] = _dst[i+2][1]*_dry + _src[i+2][1]*_wet;
|
||||
_dst[i+3][0] = _dst[i+3][0]*_dry + _src[i+3][0]*_wet;
|
||||
_dst[i+3][1] = _dst[i+3][1]*_dry + _src[i+3][1]*_wet;
|
||||
i += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void alignedBufWetDryMixSplittedSSE( sampleFrameA * RP _dst,
|
||||
const float * RP _left,
|
||||
const float * RP _right,
|
||||
float _wet, float _dry, int _frames )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < _frames; )
|
||||
{
|
||||
_dst[i+0][0] = _dst[i+0][0]*_dry + _left[i+0]*_wet;
|
||||
_dst[i+0][1] = _dst[i+0][1]*_dry + _right[i+0]*_wet;
|
||||
_dst[i+1][0] = _dst[i+1][0]*_dry + _left[i+1]*_wet;
|
||||
_dst[i+1][1] = _dst[i+1][1]*_dry + _right[i+1]*_wet;
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef BUILD_SSE2
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
void alignedMemCpySSE2( void * RP _dst, const void * RP _src, int _size )
|
||||
{
|
||||
__m128i * dst = (__m128i *) _dst;
|
||||
__m128i * src = (__m128i *) _src;
|
||||
const int s = _size / ( sizeof( *dst ) * 4 );
|
||||
int i;
|
||||
for( i = 0; i < s; ++i )
|
||||
{
|
||||
_mm_store_si128( dst+0, _mm_load_si128( src+0 ) );
|
||||
_mm_store_si128( dst+1, _mm_load_si128( src+1 ) );
|
||||
_mm_store_si128( dst+2, _mm_load_si128( src+2 ) );
|
||||
_mm_store_si128( dst+3, _mm_load_si128( src+3 ) );
|
||||
src += 4;
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void alignedMemClearSSE2( void * RP _dst, int _size )
|
||||
{
|
||||
__m128i * dst = (__m128i *) _dst;
|
||||
const int s = _size / ( sizeof( *dst ) * 4 );
|
||||
__m128i val = _mm_setzero_si128();
|
||||
int i;
|
||||
for( i = 0; i < s; ++i )
|
||||
{
|
||||
_mm_store_si128( dst+0, val );
|
||||
_mm_store_si128( dst+1, val );
|
||||
_mm_store_si128( dst+2, val );
|
||||
_mm_store_si128( dst+3, val );
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
int alignedConvertToS16SSE2( const sampleFrameA * RP _src,
|
||||
intSampleFrameA * RP _dst,
|
||||
const fpp_t _frames,
|
||||
const float _master_gain,
|
||||
const bool _convert_endian )
|
||||
{
|
||||
int t1;
|
||||
int t2;
|
||||
fpp_t frame;
|
||||
const float f = _master_gain * OUTPUT_SAMPLE_MULTIPLIER;
|
||||
if( _convert_endian )
|
||||
{
|
||||
for( frame = 0; frame < _frames; ++frame )
|
||||
{
|
||||
t1 = _src[frame][0] * f;
|
||||
t1 = unlikely( t1 > 32767 ) ? 32767 : t1;
|
||||
t1 = unlikely( t1 < -32768 ) ? -32768 : t1;
|
||||
_dst[frame][0] = ( t1 & 0x00ff) << 8 |
|
||||
( t1 & 0xff00 ) >> 8;
|
||||
|
||||
t2 = _src[frame][1] * f;
|
||||
t2 = unlikely( t2 > 32767 ) ? 32767 : t2;
|
||||
t2 = unlikely( t2 < -32768 ) ? -32768 : t2;
|
||||
_dst[frame][1] = ( t2 & 0x00ff) << 8 |
|
||||
( t2 & 0xff00 ) >> 8;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( frame = 0; frame < _frames; ++frame )
|
||||
{
|
||||
t1 = _src[frame][0] * f;
|
||||
t1 = unlikely( t1 > 32767 ) ? 32767 : t1;
|
||||
t1 = unlikely( t1 < -32768 ) ? -32768 : t1;
|
||||
_dst[frame][0] = t1;
|
||||
|
||||
t2 = _src[frame][1] * f;
|
||||
t2 = unlikely( t2 > 32767 ) ? 32767 : t2;
|
||||
t2 = unlikely( t2 < -32768 ) ? -32768 : t2;
|
||||
_dst[frame][1] = t2;
|
||||
}
|
||||
}
|
||||
|
||||
return _frames * DEFAULT_CHANNELS * BYTES_PER_INT_SAMPLE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
563
src/core/basic_ops_x86_64_sse.s
Normal file
563
src/core/basic_ops_x86_64_sse.s
Normal file
@@ -0,0 +1,563 @@
|
||||
.file "basic_ops_x86.c"
|
||||
.text
|
||||
.align 16
|
||||
.globl alignedMemCpySSE
|
||||
.type alignedMemCpySSE, @function
|
||||
alignedMemCpySSE:
|
||||
.LFB509:
|
||||
movslq %edx,%rdx
|
||||
shrq $6, %rdx
|
||||
testl %edx, %edx
|
||||
jle .L4
|
||||
leal -1(%rdx), %r9d
|
||||
xorl %eax, %eax
|
||||
mov %r9d, %r8d
|
||||
leaq 1(%r8), %rcx
|
||||
movq %rcx, %rdx
|
||||
salq $6, %rdx
|
||||
.align 16
|
||||
.L3:
|
||||
movaps (%rsi,%rax), %xmm0
|
||||
movaps %xmm0, (%rdi,%rax)
|
||||
movaps 16(%rsi,%rax), %xmm0
|
||||
movaps %xmm0, 16(%rdi,%rax)
|
||||
movaps 32(%rsi,%rax), %xmm0
|
||||
movaps %xmm0, 32(%rdi,%rax)
|
||||
movaps 48(%rsi,%rax), %xmm0
|
||||
movaps %xmm0, 48(%rdi,%rax)
|
||||
addq $64, %rax
|
||||
cmpq %rdx, %rax
|
||||
jne .L3
|
||||
.L4:
|
||||
rep
|
||||
ret
|
||||
.LFE509:
|
||||
.size alignedMemCpySSE, .-alignedMemCpySSE
|
||||
.align 16
|
||||
.globl alignedMemClearSSE
|
||||
.type alignedMemClearSSE, @function
|
||||
alignedMemClearSSE:
|
||||
.LFB510:
|
||||
movslq %esi,%rax
|
||||
shrq $6, %rax
|
||||
testl %eax, %eax
|
||||
jle .L10
|
||||
subl $1, %eax
|
||||
xorps %xmm0, %xmm0
|
||||
salq $6, %rax
|
||||
leaq 64(%rax,%rdi), %rax
|
||||
.align 16
|
||||
.L9:
|
||||
movaps %xmm0, (%rdi)
|
||||
movaps %xmm0, 16(%rdi)
|
||||
movaps %xmm0, 32(%rdi)
|
||||
movaps %xmm0, 48(%rdi)
|
||||
addq $64, %rdi
|
||||
cmpq %rax, %rdi
|
||||
jne .L9
|
||||
.L10:
|
||||
rep
|
||||
ret
|
||||
.LFE510:
|
||||
.size alignedMemClearSSE, .-alignedMemClearSSE
|
||||
.align 16
|
||||
.globl alignedBufApplyGainSSE
|
||||
.type alignedBufApplyGainSSE, @function
|
||||
alignedBufApplyGainSSE:
|
||||
.LFB511:
|
||||
testl %esi, %esi
|
||||
jle .L15
|
||||
subl $1, %esi
|
||||
shufps $0, %xmm0, %xmm0
|
||||
shrl $3, %esi
|
||||
xorl %eax, %eax
|
||||
leal 1(%rsi), %edx
|
||||
.align 16
|
||||
.L14:
|
||||
movaps %xmm0, %xmm3
|
||||
addl $1, %eax
|
||||
movaps %xmm0, %xmm2
|
||||
movaps %xmm0, %xmm1
|
||||
movaps %xmm0, %xmm4
|
||||
mulps 16(%rdi), %xmm3
|
||||
mulps 32(%rdi), %xmm2
|
||||
mulps 48(%rdi), %xmm1
|
||||
mulps (%rdi), %xmm4
|
||||
movaps %xmm3, 16(%rdi)
|
||||
movaps %xmm2, 32(%rdi)
|
||||
movaps %xmm1, 48(%rdi)
|
||||
movaps %xmm4, (%rdi)
|
||||
addq $64, %rdi
|
||||
cmpl %eax, %edx
|
||||
ja .L14
|
||||
.L15:
|
||||
rep
|
||||
ret
|
||||
.LFE511:
|
||||
.size alignedBufApplyGainSSE, .-alignedBufApplyGainSSE
|
||||
.align 16
|
||||
.globl alignedBufMixSSE
|
||||
.type alignedBufMixSSE, @function
|
||||
alignedBufMixSSE:
|
||||
.LFB512:
|
||||
testl %edx, %edx
|
||||
jle .L20
|
||||
subl $1, %edx
|
||||
xorl %eax, %eax
|
||||
shrl $3, %edx
|
||||
leal 1(%rdx), %ecx
|
||||
xorl %edx, %edx
|
||||
.align 16
|
||||
.L19:
|
||||
movaps 16(%rdi,%rax), %xmm2
|
||||
addl $1, %edx
|
||||
movaps 32(%rdi,%rax), %xmm1
|
||||
addps 16(%rsi,%rax), %xmm2
|
||||
movaps 48(%rdi,%rax), %xmm0
|
||||
addps 32(%rsi,%rax), %xmm1
|
||||
movaps (%rdi,%rax), %xmm3
|
||||
addps 48(%rsi,%rax), %xmm0
|
||||
addps (%rsi,%rax), %xmm3
|
||||
movaps %xmm2, 16(%rdi,%rax)
|
||||
movaps %xmm1, 32(%rdi,%rax)
|
||||
movaps %xmm0, 48(%rdi,%rax)
|
||||
movaps %xmm3, (%rdi,%rax)
|
||||
addq $64, %rax
|
||||
cmpl %edx, %ecx
|
||||
ja .L19
|
||||
.L20:
|
||||
rep
|
||||
ret
|
||||
.LFE512:
|
||||
.size alignedBufMixSSE, .-alignedBufMixSSE
|
||||
.align 16
|
||||
.globl alignedBufMixLRCoeffSSE
|
||||
.type alignedBufMixLRCoeffSSE, @function
|
||||
alignedBufMixLRCoeffSSE:
|
||||
.LFB513:
|
||||
testl %edx, %edx
|
||||
jle .L25
|
||||
unpcklps %xmm1, %xmm0
|
||||
subl $1, %edx
|
||||
shrl $2, %edx
|
||||
xorl %eax, %eax
|
||||
leal 1(%rdx), %ecx
|
||||
xorl %edx, %edx
|
||||
movlhps %xmm0, %xmm0
|
||||
.align 16
|
||||
.L24:
|
||||
movaps %xmm0, %xmm1
|
||||
addl $1, %edx
|
||||
movaps %xmm0, %xmm2
|
||||
mulps 16(%rsi,%rax), %xmm1
|
||||
mulps (%rsi,%rax), %xmm2
|
||||
addps 16(%rdi,%rax), %xmm1
|
||||
addps (%rdi,%rax), %xmm2
|
||||
movaps %xmm1, 16(%rdi,%rax)
|
||||
movaps %xmm2, (%rdi,%rax)
|
||||
addq $32, %rax
|
||||
cmpl %edx, %ecx
|
||||
ja .L24
|
||||
.L25:
|
||||
rep
|
||||
ret
|
||||
.LFE513:
|
||||
.size alignedBufMixLRCoeffSSE, .-alignedBufMixLRCoeffSSE
|
||||
.align 16
|
||||
.globl alignedBufWetDryMixSSE
|
||||
.type alignedBufWetDryMixSSE, @function
|
||||
alignedBufWetDryMixSSE:
|
||||
.LFB515:
|
||||
testl %edx, %edx
|
||||
jle .L30
|
||||
subl $1, %edx
|
||||
shufps $0, %xmm1, %xmm1
|
||||
shufps $0, %xmm0, %xmm0
|
||||
shrl $2, %edx
|
||||
leal 1(%rdx), %ecx
|
||||
xorl %eax, %eax
|
||||
xorl %edx, %edx
|
||||
.align 16
|
||||
.L29:
|
||||
movaps %xmm1, %xmm3
|
||||
addl $1, %edx
|
||||
movaps %xmm0, %xmm2
|
||||
mulps 16(%rdi,%rax), %xmm3
|
||||
movaps %xmm1, %xmm4
|
||||
mulps 16(%rsi,%rax), %xmm2
|
||||
mulps (%rdi,%rax), %xmm4
|
||||
addps %xmm3, %xmm2
|
||||
movaps %xmm0, %xmm3
|
||||
mulps (%rsi,%rax), %xmm3
|
||||
movaps %xmm2, 16(%rdi,%rax)
|
||||
addps %xmm4, %xmm3
|
||||
movaps %xmm3, (%rdi,%rax)
|
||||
addq $32, %rax
|
||||
cmpl %edx, %ecx
|
||||
ja .L29
|
||||
.L30:
|
||||
rep
|
||||
ret
|
||||
.LFE515:
|
||||
.size alignedBufWetDryMixSSE, .-alignedBufWetDryMixSSE
|
||||
.align 16
|
||||
.globl alignedBufWetDryMixSplittedSSE
|
||||
.type alignedBufWetDryMixSplittedSSE, @function
|
||||
alignedBufWetDryMixSplittedSSE:
|
||||
.LFB516:
|
||||
pushq %rbp
|
||||
.LCFI0:
|
||||
testl %ecx, %ecx
|
||||
pushq %rbx
|
||||
.LCFI1:
|
||||
jle .L39
|
||||
leal -1(%rcx), %ebx
|
||||
shrl %ebx
|
||||
addl $1, %ebx
|
||||
movl %ebx, %r11d
|
||||
shrl $2, %r11d
|
||||
cmpl $3, %ebx
|
||||
leal 0(,%r11,4), %ebp
|
||||
jbe .L40
|
||||
testl %ebp, %ebp
|
||||
jne .L34
|
||||
.L40:
|
||||
xorl %r9d, %r9d
|
||||
jmp .L36
|
||||
.align 16
|
||||
.L34:
|
||||
movaps %xmm1, %xmm2
|
||||
movq %rdi, %rax
|
||||
xorps %xmm6, %xmm6
|
||||
movq %rsi, %r9
|
||||
shufps $0, %xmm2, %xmm2
|
||||
movq %rdx, %r8
|
||||
xorl %r10d, %r10d
|
||||
movaps %xmm2, %xmm8
|
||||
movaps %xmm0, %xmm2
|
||||
shufps $0, %xmm2, %xmm2
|
||||
movaps %xmm2, %xmm7
|
||||
.align 16
|
||||
.L37:
|
||||
movaps (%rax), %xmm12
|
||||
addl $1, %r10d
|
||||
movaps %xmm6, %xmm3
|
||||
movaps 16(%rax), %xmm5
|
||||
movaps %xmm12, %xmm14
|
||||
movlps (%r8), %xmm3
|
||||
movaps 32(%rax), %xmm9
|
||||
shufps $136, %xmm5, %xmm14
|
||||
shufps $221, %xmm5, %xmm12
|
||||
movhps 8(%r8), %xmm3
|
||||
movaps 48(%rax), %xmm4
|
||||
movaps %xmm9, %xmm13
|
||||
movaps %xmm6, %xmm5
|
||||
shufps $221, %xmm4, %xmm9
|
||||
movlps (%r9), %xmm5
|
||||
shufps $136, %xmm4, %xmm13
|
||||
movaps %xmm6, %xmm4
|
||||
movhps 8(%r9), %xmm5
|
||||
movaps %xmm14, %xmm11
|
||||
movlps 16(%r9), %xmm4
|
||||
movaps %xmm12, %xmm15
|
||||
movaps %xmm5, %xmm2
|
||||
movhps 24(%r9), %xmm4
|
||||
shufps $136, %xmm13, %xmm11
|
||||
movaps %xmm3, %xmm10
|
||||
addq $32, %r9
|
||||
shufps $136, %xmm4, %xmm2
|
||||
mulps %xmm8, %xmm11
|
||||
mulps %xmm7, %xmm2
|
||||
shufps $221, %xmm13, %xmm14
|
||||
shufps $136, %xmm9, %xmm15
|
||||
shufps $221, %xmm4, %xmm5
|
||||
addps %xmm2, %xmm11
|
||||
movaps %xmm6, %xmm2
|
||||
shufps $221, %xmm9, %xmm12
|
||||
movlps 16(%r8), %xmm2
|
||||
mulps %xmm8, %xmm14
|
||||
movhps 24(%r8), %xmm2
|
||||
mulps %xmm7, %xmm5
|
||||
movaps %xmm11, %xmm9
|
||||
addq $32, %r8
|
||||
shufps $136, %xmm2, %xmm10
|
||||
shufps $221, %xmm2, %xmm3
|
||||
movaps %xmm14, %xmm4
|
||||
mulps %xmm8, %xmm15
|
||||
addps %xmm5, %xmm4
|
||||
mulps %xmm7, %xmm10
|
||||
movaps %xmm11, %xmm5
|
||||
mulps %xmm8, %xmm12
|
||||
mulps %xmm7, %xmm3
|
||||
addps %xmm15, %xmm10
|
||||
unpcklps %xmm4, %xmm9
|
||||
movaps %xmm12, %xmm2
|
||||
unpckhps %xmm4, %xmm5
|
||||
addps %xmm3, %xmm2
|
||||
movaps %xmm10, %xmm4
|
||||
movaps %xmm10, %xmm3
|
||||
unpcklps %xmm2, %xmm4
|
||||
unpckhps %xmm2, %xmm3
|
||||
movaps %xmm9, %xmm2
|
||||
unpcklps %xmm4, %xmm2
|
||||
unpckhps %xmm4, %xmm9
|
||||
movaps %xmm2, (%rax)
|
||||
movaps %xmm5, %xmm2
|
||||
unpckhps %xmm3, %xmm5
|
||||
unpcklps %xmm3, %xmm2
|
||||
movaps %xmm9, 16(%rax)
|
||||
movaps %xmm2, 32(%rax)
|
||||
movaps %xmm5, 48(%rax)
|
||||
addq $64, %rax
|
||||
cmpl %r10d, %r11d
|
||||
ja .L37
|
||||
cmpl %ebx, %ebp
|
||||
leal (%rbp,%rbp), %r9d
|
||||
je .L39
|
||||
.L36:
|
||||
movslq %r9d,%rax
|
||||
leaq 1(%rax), %rbx
|
||||
leaq 0(,%rax,4), %r10
|
||||
leaq (%rdi,%rax,8), %r8
|
||||
leaq (%rdi,%rbx,8), %rax
|
||||
salq $2, %rbx
|
||||
leaq (%rsi,%r10), %r11
|
||||
leaq (%rdx,%r10), %r10
|
||||
addq %rbx, %rsi
|
||||
addq %rbx, %rdx
|
||||
.align 16
|
||||
.L38:
|
||||
movaps %xmm1, %xmm3
|
||||
addl $2, %r9d
|
||||
movaps %xmm0, %xmm2
|
||||
mulss (%r8), %xmm3
|
||||
mulss (%r11), %xmm2
|
||||
addq $8, %r11
|
||||
addss %xmm3, %xmm2
|
||||
movaps %xmm1, %xmm3
|
||||
mulss 4(%r8), %xmm3
|
||||
movss %xmm2, (%r8)
|
||||
movaps %xmm0, %xmm2
|
||||
mulss (%r10), %xmm2
|
||||
addq $8, %r10
|
||||
addss %xmm3, %xmm2
|
||||
movaps %xmm1, %xmm3
|
||||
movss %xmm2, 4(%r8)
|
||||
movaps %xmm0, %xmm2
|
||||
addq $16, %r8
|
||||
mulss (%rax), %xmm3
|
||||
mulss (%rsi), %xmm2
|
||||
addq $8, %rsi
|
||||
addss %xmm3, %xmm2
|
||||
movaps %xmm1, %xmm3
|
||||
mulss 4(%rax), %xmm3
|
||||
movss %xmm2, (%rax)
|
||||
movaps %xmm0, %xmm2
|
||||
mulss (%rdx), %xmm2
|
||||
addq $8, %rdx
|
||||
addss %xmm3, %xmm2
|
||||
movss %xmm2, 4(%rax)
|
||||
addq $16, %rax
|
||||
cmpl %r9d, %ecx
|
||||
jg .L38
|
||||
.L39:
|
||||
popq %rbx
|
||||
popq %rbp
|
||||
ret
|
||||
.LFE516:
|
||||
.size alignedBufWetDryMixSplittedSSE, .-alignedBufWetDryMixSplittedSSE
|
||||
.align 16
|
||||
.globl unalignedBufMixLRCoeffSSE
|
||||
.type unalignedBufMixLRCoeffSSE, @function
|
||||
unalignedBufMixLRCoeffSSE:
|
||||
.LFB514:
|
||||
movl %edx, %eax
|
||||
shrl $31, %eax
|
||||
leal (%rdx,%rax), %ecx
|
||||
andl $1, %ecx
|
||||
cmpl %eax, %ecx
|
||||
jne .L52
|
||||
.L44:
|
||||
testl %edx, %edx
|
||||
jle .L49
|
||||
subl $1, %edx
|
||||
shrl %edx
|
||||
testb $15, %dil
|
||||
jne .L46
|
||||
unpcklps %xmm1, %xmm0
|
||||
addl $1, %edx
|
||||
xorps %xmm3, %xmm3
|
||||
xorl %eax, %eax
|
||||
movlhps %xmm0, %xmm0
|
||||
.align 16
|
||||
.L47:
|
||||
movaps %xmm3, %xmm2
|
||||
addl $1, %eax
|
||||
movaps %xmm3, %xmm1
|
||||
movlps (%rsi), %xmm2
|
||||
movlps (%rdi), %xmm1
|
||||
movhps 8(%rsi), %xmm2
|
||||
addq $16, %rsi
|
||||
movhps 8(%rdi), %xmm1
|
||||
mulps %xmm0, %xmm2
|
||||
addps %xmm2, %xmm1
|
||||
movaps %xmm1, (%rdi)
|
||||
addq $16, %rdi
|
||||
cmpl %edx, %eax
|
||||
jb .L47
|
||||
rep
|
||||
ret
|
||||
.align 16
|
||||
.L46:
|
||||
mov %edx, %edx
|
||||
xorl %eax, %eax
|
||||
addq $1, %rdx
|
||||
salq $4, %rdx
|
||||
.align 16
|
||||
.L48:
|
||||
movaps %xmm0, %xmm2
|
||||
mulss (%rsi,%rax), %xmm2
|
||||
addss (%rdi,%rax), %xmm2
|
||||
movss %xmm2, (%rdi,%rax)
|
||||
movaps %xmm1, %xmm2
|
||||
mulss 4(%rsi,%rax), %xmm2
|
||||
addss 4(%rdi,%rax), %xmm2
|
||||
movss %xmm2, 4(%rdi,%rax)
|
||||
movaps %xmm0, %xmm2
|
||||
mulss 8(%rsi,%rax), %xmm2
|
||||
addss 8(%rdi,%rax), %xmm2
|
||||
movss %xmm2, 8(%rdi,%rax)
|
||||
movaps %xmm1, %xmm2
|
||||
mulss 12(%rsi,%rax), %xmm2
|
||||
addss 12(%rdi,%rax), %xmm2
|
||||
movss %xmm2, 12(%rdi,%rax)
|
||||
addq $16, %rax
|
||||
cmpq %rdx, %rax
|
||||
jne .L48
|
||||
.L49:
|
||||
rep
|
||||
ret
|
||||
.L52:
|
||||
movaps %xmm0, %xmm2
|
||||
subl $1, %edx
|
||||
movss (%rdi), %xmm3
|
||||
mulss (%rsi), %xmm2
|
||||
addss %xmm3, %xmm2
|
||||
movss 4(%rdi), %xmm3
|
||||
movss %xmm2, (%rdi)
|
||||
movaps %xmm1, %xmm2
|
||||
mulss 4(%rsi), %xmm2
|
||||
addq $8, %rsi
|
||||
addss %xmm3, %xmm2
|
||||
movss %xmm2, 4(%rdi)
|
||||
addq $8, %rdi
|
||||
jmp .L44
|
||||
.LFE514:
|
||||
.size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE
|
||||
.section .eh_frame,"aw",@progbits
|
||||
.Lframe1:
|
||||
.long .LECIE1-.LSCIE1
|
||||
.LSCIE1:
|
||||
.long 0x0
|
||||
.byte 0x1
|
||||
.string "zR"
|
||||
.byte 0x1
|
||||
.byte 0x78
|
||||
.byte 0x10
|
||||
.byte 0x1
|
||||
.byte 0x3
|
||||
.byte 0xc
|
||||
.byte 0x7
|
||||
.byte 0x8
|
||||
.byte 0x11
|
||||
.byte 0x10
|
||||
.byte 0x1
|
||||
.align 8
|
||||
.LECIE1:
|
||||
.LSFDE1:
|
||||
.long .LEFDE1-.LASFDE1
|
||||
.LASFDE1:
|
||||
.long .LASFDE1-.Lframe1
|
||||
.long .LFB509
|
||||
.long .LFE509-.LFB509
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE1:
|
||||
.LSFDE3:
|
||||
.long .LEFDE3-.LASFDE3
|
||||
.LASFDE3:
|
||||
.long .LASFDE3-.Lframe1
|
||||
.long .LFB510
|
||||
.long .LFE510-.LFB510
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE3:
|
||||
.LSFDE5:
|
||||
.long .LEFDE5-.LASFDE5
|
||||
.LASFDE5:
|
||||
.long .LASFDE5-.Lframe1
|
||||
.long .LFB511
|
||||
.long .LFE511-.LFB511
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE5:
|
||||
.LSFDE7:
|
||||
.long .LEFDE7-.LASFDE7
|
||||
.LASFDE7:
|
||||
.long .LASFDE7-.Lframe1
|
||||
.long .LFB512
|
||||
.long .LFE512-.LFB512
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE7:
|
||||
.LSFDE9:
|
||||
.long .LEFDE9-.LASFDE9
|
||||
.LASFDE9:
|
||||
.long .LASFDE9-.Lframe1
|
||||
.long .LFB513
|
||||
.long .LFE513-.LFB513
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE9:
|
||||
.LSFDE11:
|
||||
.long .LEFDE11-.LASFDE11
|
||||
.LASFDE11:
|
||||
.long .LASFDE11-.Lframe1
|
||||
.long .LFB515
|
||||
.long .LFE515-.LFB515
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE11:
|
||||
.LSFDE13:
|
||||
.long .LEFDE13-.LASFDE13
|
||||
.LASFDE13:
|
||||
.long .LASFDE13-.Lframe1
|
||||
.long .LFB516
|
||||
.long .LFE516-.LFB516
|
||||
.byte 0x0
|
||||
.byte 0x4
|
||||
.long .LCFI0-.LFB516
|
||||
.byte 0xe
|
||||
.byte 0x10
|
||||
.byte 0x4
|
||||
.long .LCFI1-.LCFI0
|
||||
.byte 0xe
|
||||
.byte 0x18
|
||||
.byte 0x11
|
||||
.byte 0x3
|
||||
.byte 0x3
|
||||
.byte 0x11
|
||||
.byte 0x6
|
||||
.byte 0x2
|
||||
.align 8
|
||||
.LEFDE13:
|
||||
.LSFDE15:
|
||||
.long .LEFDE15-.LASFDE15
|
||||
.LASFDE15:
|
||||
.long .LASFDE15-.Lframe1
|
||||
.long .LFB514
|
||||
.long .LFE514-.LFB514
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE15:
|
||||
.ident "GCC: (GNU) 4.4.0 20081110 (experimental)"
|
||||
395
src/core/basic_ops_x86_64_sse2.s
Normal file
395
src/core/basic_ops_x86_64_sse2.s
Normal file
@@ -0,0 +1,395 @@
|
||||
.file "basic_ops_x86.c"
|
||||
.text
|
||||
.align 16
|
||||
.globl alignedMemCpySSE2
|
||||
.type alignedMemCpySSE2, @function
|
||||
alignedMemCpySSE2:
|
||||
.LFB509:
|
||||
movslq %edx,%rdx
|
||||
shrq $6, %rdx
|
||||
testl %edx, %edx
|
||||
jle .L4
|
||||
leal -1(%rdx), %r9d
|
||||
xorl %eax, %eax
|
||||
mov %r9d, %r8d
|
||||
leaq 1(%r8), %rcx
|
||||
movq %rcx, %rdx
|
||||
salq $6, %rdx
|
||||
.align 16
|
||||
.L3:
|
||||
movdqa (%rsi,%rax), %xmm0
|
||||
movdqa %xmm0, (%rdi,%rax)
|
||||
movdqa 16(%rsi,%rax), %xmm0
|
||||
movdqa %xmm0, 16(%rdi,%rax)
|
||||
movdqa 32(%rsi,%rax), %xmm0
|
||||
movdqa %xmm0, 32(%rdi,%rax)
|
||||
movdqa 48(%rsi,%rax), %xmm0
|
||||
movdqa %xmm0, 48(%rdi,%rax)
|
||||
addq $64, %rax
|
||||
cmpq %rdx, %rax
|
||||
jne .L3
|
||||
.L4:
|
||||
rep
|
||||
ret
|
||||
.LFE509:
|
||||
.size alignedMemCpySSE2, .-alignedMemCpySSE2
|
||||
.align 16
|
||||
.globl alignedMemClearSSE2
|
||||
.type alignedMemClearSSE2, @function
|
||||
alignedMemClearSSE2:
|
||||
.LFB510:
|
||||
movslq %esi,%rax
|
||||
shrq $6, %rax
|
||||
testl %eax, %eax
|
||||
jle .L10
|
||||
subl $1, %eax
|
||||
pxor %xmm0, %xmm0
|
||||
salq $6, %rax
|
||||
leaq 64(%rax,%rdi), %rax
|
||||
.align 16
|
||||
.L9:
|
||||
movdqa %xmm0, (%rdi)
|
||||
movdqa %xmm0, 16(%rdi)
|
||||
movdqa %xmm0, 32(%rdi)
|
||||
movdqa %xmm0, 48(%rdi)
|
||||
addq $64, %rdi
|
||||
cmpq %rax, %rdi
|
||||
jne .L9
|
||||
.L10:
|
||||
rep
|
||||
ret
|
||||
.LFE510:
|
||||
.size alignedMemClearSSE2, .-alignedMemClearSSE2
|
||||
.align 16
|
||||
.globl alignedConvertToS16SSE2
|
||||
.type alignedConvertToS16SSE2, @function
|
||||
alignedConvertToS16SSE2:
|
||||
.LFB511:
|
||||
pushq %rbp
|
||||
.LCFI0:
|
||||
testb %cl, %cl
|
||||
movl %edx, %eax
|
||||
mulss .LC0(%rip), %xmm0
|
||||
pushq %rbx
|
||||
.LCFI1:
|
||||
jne .L13
|
||||
testw %dx, %dx
|
||||
jle .L15
|
||||
movl %edx, %ebx
|
||||
shrw $2, %bx
|
||||
cmpw $3, %dx
|
||||
leal 0(,%rbx,4), %r8d
|
||||
ja .L33
|
||||
.L28:
|
||||
xorl %r8d, %r8d
|
||||
.align 16
|
||||
.L23:
|
||||
movswq %r8w,%rdx
|
||||
movl $32767, %ebx
|
||||
leaq (%rdi,%rdx,8), %rcx
|
||||
leaq (%rsi,%rdx,4), %rdx
|
||||
movl $-32768, %edi
|
||||
.align 16
|
||||
.L25:
|
||||
movaps %xmm0, %xmm1
|
||||
mulss (%rcx), %xmm1
|
||||
cvttss2si %xmm1, %esi
|
||||
movaps %xmm0, %xmm1
|
||||
mulss 4(%rcx), %xmm1
|
||||
cmpl $-32768, %esi
|
||||
cmovl %edi, %esi
|
||||
cmpl $32767, %esi
|
||||
cmovg %ebx, %esi
|
||||
movw %si, (%rdx)
|
||||
cvttss2si %xmm1, %esi
|
||||
cmpl $-32768, %esi
|
||||
cmovl %edi, %esi
|
||||
cmpl $32767, %esi
|
||||
cmovg %ebx, %esi
|
||||
addl $1, %r8d
|
||||
addq $8, %rcx
|
||||
movw %si, 2(%rdx)
|
||||
addq $4, %rdx
|
||||
cmpw %r8w, %ax
|
||||
jg .L25
|
||||
.L15:
|
||||
cwtl
|
||||
popq %rbx
|
||||
sall $2, %eax
|
||||
popq %rbp
|
||||
ret
|
||||
.align 16
|
||||
.L13:
|
||||
testw %dx, %dx
|
||||
jle .L15
|
||||
movl %edx, %ebx
|
||||
shrw $2, %bx
|
||||
cmpw $3, %dx
|
||||
leal 0(,%rbx,4), %r8d
|
||||
ja .L34
|
||||
.L27:
|
||||
xorl %r8d, %r8d
|
||||
.align 16
|
||||
.L18:
|
||||
movswq %r8w,%rdx
|
||||
leaq (%rdi,%rdx,8), %rcx
|
||||
leaq (%rsi,%rdx,4), %rdx
|
||||
movl $-32768, %edi
|
||||
movl $32767, %esi
|
||||
.align 16
|
||||
.L20:
|
||||
movaps %xmm0, %xmm1
|
||||
mulss (%rcx), %xmm1
|
||||
cvttss2si %xmm1, %ebx
|
||||
movaps %xmm0, %xmm1
|
||||
mulss 4(%rcx), %xmm1
|
||||
cmpl $-32768, %ebx
|
||||
cmovl %edi, %ebx
|
||||
cmpl $32767, %ebx
|
||||
cmovg %esi, %ebx
|
||||
movzbl %bh, %ebp
|
||||
sall $8, %ebx
|
||||
orl %ebp, %ebx
|
||||
movw %bx, (%rdx)
|
||||
cvttss2si %xmm1, %ebx
|
||||
cmpl $-32768, %ebx
|
||||
cmovl %edi, %ebx
|
||||
cmpl $32767, %ebx
|
||||
cmovg %esi, %ebx
|
||||
addl $1, %r8d
|
||||
addq $8, %rcx
|
||||
movzbl %bh, %ebp
|
||||
sall $8, %ebx
|
||||
orl %ebp, %ebx
|
||||
movw %bx, 2(%rdx)
|
||||
addq $4, %rdx
|
||||
cmpw %r8w, %ax
|
||||
jg .L20
|
||||
cwtl
|
||||
popq %rbx
|
||||
sall $2, %eax
|
||||
popq %rbp
|
||||
ret
|
||||
.align 16
|
||||
.L34:
|
||||
testw %r8w, %r8w
|
||||
je .L27
|
||||
movaps %xmm0, %xmm1
|
||||
movq %rdi, %rcx
|
||||
movdqa .LC1(%rip), %xmm2
|
||||
movq %rsi, %r10
|
||||
shufps $0, %xmm1, %xmm1
|
||||
xorl %r9d, %r9d
|
||||
movdqa .LC3(%rip), %xmm8
|
||||
movaps %xmm1, %xmm9
|
||||
movdqa .LC2(%rip), %xmm1
|
||||
.align 16
|
||||
.L19:
|
||||
movaps %xmm9, %xmm4
|
||||
addl $1, %r9d
|
||||
movaps %xmm9, %xmm3
|
||||
mulps (%rcx), %xmm4
|
||||
movdqa %xmm1, %xmm6
|
||||
mulps 16(%rcx), %xmm3
|
||||
addq $32, %rcx
|
||||
cvttps2dq %xmm4, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pcmpgtd %xmm2, %xmm5
|
||||
cvttps2dq %xmm3, %xmm3
|
||||
pand %xmm5, %xmm4
|
||||
pandn %xmm2, %xmm5
|
||||
por %xmm5, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pcmpgtd %xmm1, %xmm5
|
||||
pand %xmm5, %xmm6
|
||||
pandn %xmm4, %xmm5
|
||||
movdqa %xmm5, %xmm4
|
||||
movdqa %xmm3, %xmm5
|
||||
por %xmm6, %xmm4
|
||||
movdqa %xmm1, %xmm6
|
||||
pcmpgtd %xmm2, %xmm5
|
||||
pand %xmm5, %xmm3
|
||||
pandn %xmm2, %xmm5
|
||||
movdqa %xmm4, %xmm7
|
||||
pslld $8, %xmm4
|
||||
pand %xmm8, %xmm7
|
||||
por %xmm5, %xmm3
|
||||
psrad $8, %xmm7
|
||||
movdqa %xmm3, %xmm5
|
||||
pcmpgtd %xmm1, %xmm5
|
||||
pand %xmm5, %xmm6
|
||||
pandn %xmm3, %xmm5
|
||||
movdqa %xmm5, %xmm3
|
||||
por %xmm6, %xmm3
|
||||
movdqa %xmm7, %xmm6
|
||||
movdqa %xmm3, %xmm5
|
||||
pslld $8, %xmm3
|
||||
pand %xmm8, %xmm5
|
||||
psrad $8, %xmm5
|
||||
punpcklwd %xmm5, %xmm7
|
||||
punpckhwd %xmm5, %xmm6
|
||||
movdqa %xmm4, %xmm5
|
||||
punpcklwd %xmm3, %xmm4
|
||||
movdqa %xmm7, %xmm10
|
||||
punpckhwd %xmm3, %xmm5
|
||||
punpcklwd %xmm6, %xmm7
|
||||
punpckhwd %xmm6, %xmm10
|
||||
punpcklwd %xmm10, %xmm7
|
||||
movdqa %xmm4, %xmm10
|
||||
punpcklwd %xmm5, %xmm4
|
||||
punpckhwd %xmm5, %xmm10
|
||||
punpcklwd %xmm10, %xmm4
|
||||
por %xmm7, %xmm4
|
||||
movdqa %xmm4, (%r10)
|
||||
addq $16, %r10
|
||||
cmpw %r9w, %bx
|
||||
ja .L19
|
||||
cmpw %dx, %r8w
|
||||
jne .L18
|
||||
jmp .L15
|
||||
.align 16
|
||||
.L33:
|
||||
testw %r8w, %r8w
|
||||
je .L28
|
||||
movaps %xmm0, %xmm1
|
||||
movq %rdi, %rcx
|
||||
movdqa .LC1(%rip), %xmm2
|
||||
movq %rsi, %r10
|
||||
shufps $0, %xmm1, %xmm1
|
||||
xorl %r9d, %r9d
|
||||
movaps %xmm1, %xmm6
|
||||
movdqa .LC2(%rip), %xmm1
|
||||
.align 16
|
||||
.L24:
|
||||
movaps %xmm6, %xmm4
|
||||
addl $1, %r9d
|
||||
movaps %xmm6, %xmm3
|
||||
mulps (%rcx), %xmm4
|
||||
movdqa %xmm1, %xmm7
|
||||
mulps 16(%rcx), %xmm3
|
||||
addq $32, %rcx
|
||||
cvttps2dq %xmm4, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pcmpgtd %xmm2, %xmm5
|
||||
cvttps2dq %xmm3, %xmm3
|
||||
pand %xmm5, %xmm4
|
||||
pandn %xmm2, %xmm5
|
||||
por %xmm5, %xmm4
|
||||
movdqa %xmm4, %xmm5
|
||||
pcmpgtd %xmm1, %xmm5
|
||||
pand %xmm5, %xmm7
|
||||
pandn %xmm4, %xmm5
|
||||
movdqa %xmm5, %xmm4
|
||||
movdqa %xmm3, %xmm5
|
||||
por %xmm7, %xmm4
|
||||
movdqa %xmm1, %xmm7
|
||||
pcmpgtd %xmm2, %xmm5
|
||||
pand %xmm5, %xmm3
|
||||
pandn %xmm2, %xmm5
|
||||
por %xmm5, %xmm3
|
||||
movdqa %xmm3, %xmm5
|
||||
pcmpgtd %xmm1, %xmm5
|
||||
pand %xmm5, %xmm7
|
||||
pandn %xmm3, %xmm5
|
||||
movdqa %xmm5, %xmm3
|
||||
movdqa %xmm4, %xmm5
|
||||
por %xmm7, %xmm3
|
||||
punpcklwd %xmm3, %xmm4
|
||||
punpckhwd %xmm3, %xmm5
|
||||
movdqa %xmm4, %xmm7
|
||||
punpcklwd %xmm5, %xmm4
|
||||
punpckhwd %xmm5, %xmm7
|
||||
punpcklwd %xmm7, %xmm4
|
||||
movdqa %xmm4, (%r10)
|
||||
addq $16, %r10
|
||||
cmpw %r9w, %bx
|
||||
ja .L24
|
||||
cmpw %r8w, %dx
|
||||
jne .L23
|
||||
jmp .L15
|
||||
.LFE511:
|
||||
.size alignedConvertToS16SSE2, .-alignedConvertToS16SSE2
|
||||
.section .rodata
|
||||
.align 4
|
||||
.LC0:
|
||||
.long 1191181824
|
||||
.align 16
|
||||
.LC1:
|
||||
.long -32768
|
||||
.long -32768
|
||||
.long -32768
|
||||
.long -32768
|
||||
.align 16
|
||||
.LC2:
|
||||
.long 32767
|
||||
.long 32767
|
||||
.long 32767
|
||||
.long 32767
|
||||
.align 16
|
||||
.LC3:
|
||||
.long 65280
|
||||
.long 65280
|
||||
.long 65280
|
||||
.long 65280
|
||||
.section .eh_frame,"aw",@progbits
|
||||
.Lframe1:
|
||||
.long .LECIE1-.LSCIE1
|
||||
.LSCIE1:
|
||||
.long 0x0
|
||||
.byte 0x1
|
||||
.string "zR"
|
||||
.byte 0x1
|
||||
.byte 0x78
|
||||
.byte 0x10
|
||||
.byte 0x1
|
||||
.byte 0x3
|
||||
.byte 0xc
|
||||
.byte 0x7
|
||||
.byte 0x8
|
||||
.byte 0x11
|
||||
.byte 0x10
|
||||
.byte 0x1
|
||||
.align 8
|
||||
.LECIE1:
|
||||
.LSFDE1:
|
||||
.long .LEFDE1-.LASFDE1
|
||||
.LASFDE1:
|
||||
.long .LASFDE1-.Lframe1
|
||||
.long .LFB509
|
||||
.long .LFE509-.LFB509
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE1:
|
||||
.LSFDE3:
|
||||
.long .LEFDE3-.LASFDE3
|
||||
.LASFDE3:
|
||||
.long .LASFDE3-.Lframe1
|
||||
.long .LFB510
|
||||
.long .LFE510-.LFB510
|
||||
.byte 0x0
|
||||
.align 8
|
||||
.LEFDE3:
|
||||
.LSFDE5:
|
||||
.long .LEFDE5-.LASFDE5
|
||||
.LASFDE5:
|
||||
.long .LASFDE5-.Lframe1
|
||||
.long .LFB511
|
||||
.long .LFE511-.LFB511
|
||||
.byte 0x0
|
||||
.byte 0x4
|
||||
.long .LCFI0-.LFB511
|
||||
.byte 0xe
|
||||
.byte 0x10
|
||||
.byte 0x4
|
||||
.long .LCFI1-.LCFI0
|
||||
.byte 0xe
|
||||
.byte 0x18
|
||||
.byte 0x11
|
||||
.byte 0x3
|
||||
.byte 0x3
|
||||
.byte 0x11
|
||||
.byte 0x6
|
||||
.byte 0x2
|
||||
.align 8
|
||||
.LEFDE5:
|
||||
.ident "GCC: (GNU) 4.4.0 20081110 (experimental)"
|
||||
107
src/core/basic_ops_x86_mmx.s
Normal file
107
src/core/basic_ops_x86_mmx.s
Normal file
@@ -0,0 +1,107 @@
|
||||
.file "basic_ops_x86.c"
|
||||
.text
|
||||
.p2align 4,,15
|
||||
.globl alignedMemCpyMMX
|
||||
.type alignedMemCpyMMX, @function
|
||||
alignedMemCpyMMX:
|
||||
pushl %ebx
|
||||
subl $112, %esp
|
||||
movl 128(%esp), %ebx
|
||||
movl 124(%esp), %eax
|
||||
shrl $6, %ebx
|
||||
#APP
|
||||
# 42 "/home/toby/development/svn/lmms-trunk/src/core/basic_ops_x86.c" 1
|
||||
fsave 4(%esp); fwait
|
||||
|
||||
# 0 "" 2
|
||||
# 44 "/home/toby/development/svn/lmms-trunk/src/core/basic_ops_x86.c" 1
|
||||
1: prefetchnta (%eax)
|
||||
prefetchnta 64(%eax)
|
||||
prefetchnta 128(%eax)
|
||||
prefetchnta 192(%eax)
|
||||
prefetchnta 256(%eax)
|
||||
|
||||
# 0 "" 2
|
||||
#NO_APP
|
||||
testl %ebx, %ebx
|
||||
je .L2
|
||||
movl 120(%esp), %ecx
|
||||
xorl %edx, %edx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L3:
|
||||
#APP
|
||||
# 53 "/home/toby/development/svn/lmms-trunk/src/core/basic_ops_x86.c" 1
|
||||
1: prefetchnta 320(%eax)
|
||||
2: movq (%eax), %mm0
|
||||
movq 8(%eax), %mm1
|
||||
movq 16(%eax), %mm2
|
||||
movq 24(%eax), %mm3
|
||||
movq %mm0, (%ecx)
|
||||
movq %mm1, 8(%ecx)
|
||||
movq %mm2, 16(%ecx)
|
||||
movq %mm3, 24(%ecx)
|
||||
movq 32(%eax), %mm0
|
||||
movq 40(%eax), %mm1
|
||||
movq 48(%eax), %mm2
|
||||
movq 56(%eax), %mm3
|
||||
movq %mm0, 32(%ecx)
|
||||
movq %mm1, 40(%ecx)
|
||||
movq %mm2, 48(%ecx)
|
||||
movq %mm3, 56(%ecx)
|
||||
|
||||
# 0 "" 2
|
||||
#NO_APP
|
||||
addl $1, %edx
|
||||
addl $64, %eax
|
||||
addl $64, %ecx
|
||||
cmpl %edx, %ebx
|
||||
jne .L3
|
||||
.L2:
|
||||
#APP
|
||||
# 75 "/home/toby/development/svn/lmms-trunk/src/core/basic_ops_x86.c" 1
|
||||
fsave 4(%esp); fwait
|
||||
|
||||
# 0 "" 2
|
||||
#NO_APP
|
||||
addl $112, %esp
|
||||
popl %ebx
|
||||
ret
|
||||
.size alignedMemCpyMMX, .-alignedMemCpyMMX
|
||||
.p2align 4,,15
|
||||
.globl alignedMemClearMMX
|
||||
.type alignedMemClearMMX, @function
|
||||
alignedMemClearMMX:
|
||||
movl 8(%esp), %ecx
|
||||
shrl $6, %ecx
|
||||
testl %ecx, %ecx
|
||||
je .L8
|
||||
movl 4(%esp), %edx
|
||||
xorl %eax, %eax
|
||||
pxor %mm0, %mm0
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L9:
|
||||
#APP
|
||||
# 90 "/home/toby/development/svn/lmms-trunk/src/core/basic_ops_x86.c" 1
|
||||
movq %mm0, (%edx)
|
||||
movq %mm0, 8(%edx)
|
||||
movq %mm0, 16(%edx)
|
||||
movq %mm0, 24(%edx)
|
||||
movq %mm0, 32(%edx)
|
||||
movq %mm0, 40(%edx)
|
||||
movq %mm0, 48(%edx)
|
||||
movq %mm0, 56(%edx)
|
||||
|
||||
# 0 "" 2
|
||||
#NO_APP
|
||||
addl $1, %eax
|
||||
addl $64, %edx
|
||||
cmpl %eax, %ecx
|
||||
jne .L9
|
||||
.L8:
|
||||
emms
|
||||
ret
|
||||
.size alignedMemClearMMX, .-alignedMemClearMMX
|
||||
.ident "GCC: (GNU) 4.4.0 20081110 (experimental)"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
505
src/core/basic_ops_x86_sse.s
Normal file
505
src/core/basic_ops_x86_sse.s
Normal file
@@ -0,0 +1,505 @@
|
||||
.file "basic_ops_x86.c"
|
||||
.text
|
||||
.p2align 4,,15
|
||||
.globl alignedMemCpySSE
|
||||
.type alignedMemCpySSE, @function
|
||||
alignedMemCpySSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 20(%esp), %esi
|
||||
movl 12(%esp), %edx
|
||||
movl 16(%esp), %ecx
|
||||
shrl $6, %esi
|
||||
testl %esi, %esi
|
||||
je .L4
|
||||
xorl %eax, %eax
|
||||
xorl %ebx, %ebx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L3:
|
||||
movaps (%ecx,%eax), %xmm0
|
||||
addl $1, %ebx
|
||||
movaps %xmm0, (%edx,%eax)
|
||||
movaps 16(%ecx,%eax), %xmm0
|
||||
movaps %xmm0, 16(%edx,%eax)
|
||||
movaps 32(%ecx,%eax), %xmm0
|
||||
movaps %xmm0, 32(%edx,%eax)
|
||||
movaps 48(%ecx,%eax), %xmm0
|
||||
movaps %xmm0, 48(%edx,%eax)
|
||||
addl $64, %eax
|
||||
cmpl %ebx, %esi
|
||||
jne .L3
|
||||
.L4:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size alignedMemCpySSE, .-alignedMemCpySSE
|
||||
.p2align 4,,15
|
||||
.globl alignedMemClearSSE
|
||||
.type alignedMemClearSSE, @function
|
||||
alignedMemClearSSE:
|
||||
movl 8(%esp), %ecx
|
||||
shrl $6, %ecx
|
||||
testl %ecx, %ecx
|
||||
je .L10
|
||||
movl 4(%esp), %eax
|
||||
xorps %xmm0, %xmm0
|
||||
xorl %edx, %edx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L9:
|
||||
addl $1, %edx
|
||||
movaps %xmm0, (%eax)
|
||||
movaps %xmm0, 16(%eax)
|
||||
movaps %xmm0, 32(%eax)
|
||||
movaps %xmm0, 48(%eax)
|
||||
addl $64, %eax
|
||||
cmpl %edx, %ecx
|
||||
jne .L9
|
||||
.L10:
|
||||
rep
|
||||
ret
|
||||
.size alignedMemClearSSE, .-alignedMemClearSSE
|
||||
.p2align 4,,15
|
||||
.globl alignedBufApplyGainSSE
|
||||
.type alignedBufApplyGainSSE, @function
|
||||
alignedBufApplyGainSSE:
|
||||
movl 12(%esp), %ecx
|
||||
testl %ecx, %ecx
|
||||
jle .L15
|
||||
movss 8(%esp), %xmm0
|
||||
subl $1, %ecx
|
||||
movl 4(%esp), %eax
|
||||
shrl $3, %ecx
|
||||
xorl %edx, %edx
|
||||
addl $1, %ecx
|
||||
shufps $0, %xmm0, %xmm0
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L14:
|
||||
movaps %xmm0, %xmm3
|
||||
addl $1, %edx
|
||||
movaps %xmm0, %xmm2
|
||||
movaps %xmm0, %xmm1
|
||||
movaps %xmm0, %xmm4
|
||||
mulps 16(%eax), %xmm3
|
||||
mulps 32(%eax), %xmm2
|
||||
mulps 48(%eax), %xmm1
|
||||
movaps %xmm3, 16(%eax)
|
||||
mulps (%eax), %xmm4
|
||||
movaps %xmm2, 32(%eax)
|
||||
movaps %xmm1, 48(%eax)
|
||||
movaps %xmm4, (%eax)
|
||||
addl $64, %eax
|
||||
cmpl %edx, %ecx
|
||||
ja .L14
|
||||
.L15:
|
||||
rep
|
||||
ret
|
||||
.size alignedBufApplyGainSSE, .-alignedBufApplyGainSSE
|
||||
.p2align 4,,15
|
||||
.globl alignedBufMixSSE
|
||||
.type alignedBufMixSSE, @function
|
||||
alignedBufMixSSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 20(%esp), %esi
|
||||
movl 12(%esp), %edx
|
||||
movl 16(%esp), %ecx
|
||||
testl %esi, %esi
|
||||
jle .L20
|
||||
subl $1, %esi
|
||||
xorl %eax, %eax
|
||||
shrl $3, %esi
|
||||
xorl %ebx, %ebx
|
||||
addl $1, %esi
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L19:
|
||||
movaps 16(%edx,%eax), %xmm2
|
||||
addl $1, %ebx
|
||||
movaps 32(%edx,%eax), %xmm1
|
||||
movaps 48(%edx,%eax), %xmm0
|
||||
movaps (%edx,%eax), %xmm3
|
||||
addps 16(%ecx,%eax), %xmm2
|
||||
addps 32(%ecx,%eax), %xmm1
|
||||
addps 48(%ecx,%eax), %xmm0
|
||||
addps (%ecx,%eax), %xmm3
|
||||
movaps %xmm2, 16(%edx,%eax)
|
||||
movaps %xmm3, (%edx,%eax)
|
||||
movaps %xmm1, 32(%edx,%eax)
|
||||
movaps %xmm0, 48(%edx,%eax)
|
||||
addl $64, %eax
|
||||
cmpl %ebx, %esi
|
||||
ja .L19
|
||||
.L20:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size alignedBufMixSSE, .-alignedBufMixSSE
|
||||
.p2align 4,,15
|
||||
.globl alignedBufMixLRCoeffSSE
|
||||
.type alignedBufMixLRCoeffSSE, @function
|
||||
alignedBufMixLRCoeffSSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 28(%esp), %esi
|
||||
movl 12(%esp), %edx
|
||||
movl 16(%esp), %ebx
|
||||
testl %esi, %esi
|
||||
jle .L25
|
||||
movss 24(%esp), %xmm0
|
||||
subl $1, %esi
|
||||
movss 20(%esp), %xmm1
|
||||
xorl %eax, %eax
|
||||
shrl $2, %esi
|
||||
xorl %ecx, %ecx
|
||||
addl $1, %esi
|
||||
unpcklps %xmm0, %xmm1
|
||||
movaps %xmm1, %xmm0
|
||||
movlhps %xmm1, %xmm0
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L24:
|
||||
movaps %xmm0, %xmm1
|
||||
addl $1, %ecx
|
||||
movaps %xmm0, %xmm2
|
||||
mulps 16(%ebx,%eax), %xmm1
|
||||
mulps (%ebx,%eax), %xmm2
|
||||
addps 16(%edx,%eax), %xmm1
|
||||
addps (%edx,%eax), %xmm2
|
||||
movaps %xmm1, 16(%edx,%eax)
|
||||
movaps %xmm2, (%edx,%eax)
|
||||
addl $32, %eax
|
||||
cmpl %ecx, %esi
|
||||
ja .L24
|
||||
.L25:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size alignedBufMixLRCoeffSSE, .-alignedBufMixLRCoeffSSE
|
||||
.p2align 4,,15
|
||||
.globl alignedBufWetDryMixSSE
|
||||
.type alignedBufWetDryMixSSE, @function
|
||||
alignedBufWetDryMixSSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 28(%esp), %esi
|
||||
movl 12(%esp), %edx
|
||||
movl 16(%esp), %ebx
|
||||
testl %esi, %esi
|
||||
jle .L30
|
||||
movss 24(%esp), %xmm1
|
||||
subl $1, %esi
|
||||
movss 20(%esp), %xmm0
|
||||
xorl %eax, %eax
|
||||
shrl $2, %esi
|
||||
xorl %ecx, %ecx
|
||||
shufps $0, %xmm1, %xmm1
|
||||
addl $1, %esi
|
||||
shufps $0, %xmm0, %xmm0
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L29:
|
||||
movaps %xmm1, %xmm3
|
||||
addl $1, %ecx
|
||||
movaps %xmm0, %xmm2
|
||||
movaps %xmm1, %xmm4
|
||||
mulps 16(%edx,%eax), %xmm3
|
||||
mulps 16(%ebx,%eax), %xmm2
|
||||
mulps (%edx,%eax), %xmm4
|
||||
addps %xmm3, %xmm2
|
||||
movaps %xmm0, %xmm3
|
||||
mulps (%ebx,%eax), %xmm3
|
||||
movaps %xmm2, 16(%edx,%eax)
|
||||
addps %xmm4, %xmm3
|
||||
movaps %xmm3, (%edx,%eax)
|
||||
addl $32, %eax
|
||||
cmpl %ecx, %esi
|
||||
ja .L29
|
||||
.L30:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size alignedBufWetDryMixSSE, .-alignedBufWetDryMixSSE
|
||||
.p2align 4,,15
|
||||
.globl alignedBufWetDryMixSplittedSSE
|
||||
.type alignedBufWetDryMixSplittedSSE, @function
|
||||
alignedBufWetDryMixSplittedSSE:
|
||||
pushl %ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
subl $140, %esp
|
||||
movl 180(%esp), %eax
|
||||
movl 160(%esp), %edx
|
||||
movl 164(%esp), %esi
|
||||
movl 168(%esp), %ecx
|
||||
testl %eax, %eax
|
||||
movss 172(%esp), %xmm4
|
||||
movss 176(%esp), %xmm5
|
||||
jle .L39
|
||||
movl 180(%esp), %eax
|
||||
subl $1, %eax
|
||||
shrl %eax
|
||||
addl $1, %eax
|
||||
movl %eax, %ebp
|
||||
movl %eax, 112(%esp)
|
||||
shrl $2, %ebp
|
||||
cmpl $3, 112(%esp)
|
||||
leal 0(,%ebp,4), %eax
|
||||
movl %eax, 116(%esp)
|
||||
jbe .L40
|
||||
testl %eax, %eax
|
||||
jne .L34
|
||||
.L40:
|
||||
xorl %edi, %edi
|
||||
jmp .L36
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L34:
|
||||
movaps %xmm4, %xmm2
|
||||
xorps %xmm6, %xmm6
|
||||
shufps $0, %xmm2, %xmm2
|
||||
movaps %xmm5, %xmm1
|
||||
movl %esi, %ebx
|
||||
shufps $0, %xmm1, %xmm1
|
||||
movaps %xmm2, 32(%esp)
|
||||
xorl %eax, %eax
|
||||
xorl %edi, %edi
|
||||
movss %xmm5, 124(%esp)
|
||||
movss %xmm4, 120(%esp)
|
||||
movaps %xmm1, %xmm4
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L37:
|
||||
movaps 16(%edx,%eax,2), %xmm3
|
||||
addl $1, %edi
|
||||
movaps (%edx,%eax,2), %xmm2
|
||||
movaps 48(%edx,%eax,2), %xmm0
|
||||
movaps %xmm2, %xmm5
|
||||
shufps $221, %xmm3, %xmm2
|
||||
movaps 32(%edx,%eax,2), %xmm1
|
||||
shufps $136, %xmm3, %xmm5
|
||||
movaps %xmm2, 96(%esp)
|
||||
movaps %xmm1, %xmm7
|
||||
shufps $221, %xmm0, %xmm1
|
||||
shufps $136, %xmm0, %xmm7
|
||||
movaps %xmm1, 64(%esp)
|
||||
movaps %xmm6, %xmm3
|
||||
movaps %xmm5, (%esp)
|
||||
shufps $136, %xmm7, %xmm5
|
||||
movlps (%ebx), %xmm3
|
||||
movaps %xmm6, %xmm2
|
||||
movhps 8(%ebx), %xmm3
|
||||
movaps %xmm7, 80(%esp)
|
||||
movlps 16(%ebx), %xmm2
|
||||
movhps 24(%ebx), %xmm2
|
||||
movaps 96(%esp), %xmm7
|
||||
addl $32, %ebx
|
||||
movaps %xmm3, %xmm0
|
||||
shufps $221, %xmm2, %xmm3
|
||||
shufps $136, %xmm2, %xmm0
|
||||
shufps $136, 64(%esp), %xmm7
|
||||
mulps 32(%esp), %xmm0
|
||||
movaps %xmm6, %xmm1
|
||||
movlps (%ecx,%eax), %xmm1
|
||||
movhps 8(%ecx,%eax), %xmm1
|
||||
movaps 96(%esp), %xmm2
|
||||
mulps %xmm4, %xmm7
|
||||
shufps $221, 64(%esp), %xmm2
|
||||
mulps %xmm4, %xmm5
|
||||
mulps 32(%esp), %xmm3
|
||||
movaps %xmm7, 16(%esp)
|
||||
movaps %xmm1, %xmm7
|
||||
addps %xmm0, %xmm5
|
||||
movaps %xmm6, %xmm0
|
||||
movlps 16(%ecx,%eax), %xmm0
|
||||
movhps 24(%ecx,%eax), %xmm0
|
||||
shufps $136, %xmm0, %xmm7
|
||||
shufps $221, %xmm0, %xmm1
|
||||
mulps 32(%esp), %xmm7
|
||||
mulps 32(%esp), %xmm1
|
||||
mulps %xmm4, %xmm2
|
||||
movaps %xmm7, 48(%esp)
|
||||
movaps 16(%esp), %xmm7
|
||||
addps 48(%esp), %xmm7
|
||||
addps %xmm1, %xmm2
|
||||
movaps %xmm7, 16(%esp)
|
||||
movaps (%esp), %xmm7
|
||||
shufps $221, 80(%esp), %xmm7
|
||||
movaps 16(%esp), %xmm1
|
||||
mulps %xmm4, %xmm7
|
||||
movaps 16(%esp), %xmm0
|
||||
unpckhps %xmm2, %xmm1
|
||||
unpcklps %xmm2, %xmm0
|
||||
movaps %xmm1, %xmm2
|
||||
addps %xmm3, %xmm7
|
||||
movaps %xmm5, %xmm3
|
||||
unpcklps %xmm7, %xmm3
|
||||
unpckhps %xmm7, %xmm5
|
||||
movaps %xmm3, %xmm1
|
||||
unpckhps %xmm0, %xmm3
|
||||
unpcklps %xmm0, %xmm1
|
||||
movaps %xmm5, %xmm0
|
||||
unpckhps %xmm2, %xmm5
|
||||
unpcklps %xmm2, %xmm0
|
||||
movaps %xmm1, (%edx,%eax,2)
|
||||
movaps %xmm3, 16(%edx,%eax,2)
|
||||
movaps %xmm0, 32(%edx,%eax,2)
|
||||
movaps %xmm5, 48(%edx,%eax,2)
|
||||
addl $32, %eax
|
||||
cmpl %edi, %ebp
|
||||
ja .L37
|
||||
movl 116(%esp), %edi
|
||||
movl 112(%esp), %eax
|
||||
movss 120(%esp), %xmm4
|
||||
movss 124(%esp), %xmm5
|
||||
addl %edi, %edi
|
||||
cmpl %eax, 116(%esp)
|
||||
je .L39
|
||||
.L36:
|
||||
leal (%edx,%edi,8), %ebx
|
||||
xorl %ebp, %ebp
|
||||
leal 8(%edx,%edi,8), %edx
|
||||
movl %edi, %eax
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L38:
|
||||
movaps %xmm5, %xmm1
|
||||
addl $2, %ebp
|
||||
movaps %xmm4, %xmm0
|
||||
mulss (%ebx), %xmm1
|
||||
mulss (%esi,%eax,4), %xmm0
|
||||
addss %xmm1, %xmm0
|
||||
movaps %xmm5, %xmm1
|
||||
movss %xmm0, (%ebx)
|
||||
movaps %xmm4, %xmm0
|
||||
mulss 4(%ebx), %xmm1
|
||||
mulss (%ecx,%eax,4), %xmm0
|
||||
addss %xmm1, %xmm0
|
||||
movaps %xmm5, %xmm1
|
||||
movss %xmm0, 4(%ebx)
|
||||
addl $16, %ebx
|
||||
movaps %xmm4, %xmm0
|
||||
mulss (%edx), %xmm1
|
||||
mulss 4(%esi,%eax,4), %xmm0
|
||||
addss %xmm1, %xmm0
|
||||
movaps %xmm5, %xmm1
|
||||
movss %xmm0, (%edx)
|
||||
movaps %xmm4, %xmm0
|
||||
mulss 4(%edx), %xmm1
|
||||
mulss 4(%ecx,%eax,4), %xmm0
|
||||
leal (%edi,%ebp), %eax
|
||||
addss %xmm1, %xmm0
|
||||
movss %xmm0, 4(%edx)
|
||||
addl $16, %edx
|
||||
cmpl %eax, 180(%esp)
|
||||
jg .L38
|
||||
.L39:
|
||||
addl $140, %esp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
ret
|
||||
.size alignedBufWetDryMixSplittedSSE, .-alignedBufWetDryMixSplittedSSE
|
||||
.p2align 4,,15
|
||||
.globl unalignedBufMixLRCoeffSSE
|
||||
.type unalignedBufMixLRCoeffSSE, @function
|
||||
unalignedBufMixLRCoeffSSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 28(%esp), %esi
|
||||
movl 12(%esp), %eax
|
||||
movl 16(%esp), %edx
|
||||
movss 20(%esp), %xmm0
|
||||
movl %esi, %ecx
|
||||
shrl $31, %ecx
|
||||
leal (%esi,%ecx), %ebx
|
||||
andl $1, %ebx
|
||||
cmpl %ecx, %ebx
|
||||
movss 24(%esp), %xmm3
|
||||
jne .L52
|
||||
.L44:
|
||||
testl %esi, %esi
|
||||
jle .L49
|
||||
leal -1(%esi), %ebx
|
||||
shrl %ebx
|
||||
testb $15, %al
|
||||
jne .L46
|
||||
movaps %xmm0, %xmm1
|
||||
xorps %xmm2, %xmm2
|
||||
unpcklps %xmm3, %xmm1
|
||||
addl $1, %ebx
|
||||
xorl %ecx, %ecx
|
||||
movaps %xmm1, %xmm3
|
||||
movlhps %xmm1, %xmm3
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L47:
|
||||
movaps %xmm2, %xmm1
|
||||
addl $1, %ecx
|
||||
movlps (%edx), %xmm1
|
||||
movhps 8(%edx), %xmm1
|
||||
movaps %xmm2, %xmm0
|
||||
movlps (%eax), %xmm0
|
||||
movhps 8(%eax), %xmm0
|
||||
addl $16, %edx
|
||||
mulps %xmm3, %xmm1
|
||||
addps %xmm1, %xmm0
|
||||
movaps %xmm0, (%eax)
|
||||
addl $16, %eax
|
||||
cmpl %ebx, %ecx
|
||||
jb .L47
|
||||
.L49:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L46:
|
||||
xorl %ecx, %ecx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L48:
|
||||
movaps %xmm0, %xmm1
|
||||
mulss (%edx,%ecx,8), %xmm1
|
||||
addss (%eax,%ecx,8), %xmm1
|
||||
movss %xmm1, (%eax,%ecx,8)
|
||||
movaps %xmm3, %xmm1
|
||||
mulss 4(%edx,%ecx,8), %xmm1
|
||||
addss 4(%eax,%ecx,8), %xmm1
|
||||
movss %xmm1, 4(%eax,%ecx,8)
|
||||
movaps %xmm0, %xmm1
|
||||
mulss 8(%edx,%ecx,8), %xmm1
|
||||
addss 8(%eax,%ecx,8), %xmm1
|
||||
movss %xmm1, 8(%eax,%ecx,8)
|
||||
movaps %xmm3, %xmm1
|
||||
mulss 12(%edx,%ecx,8), %xmm1
|
||||
addss 12(%eax,%ecx,8), %xmm1
|
||||
movss %xmm1, 12(%eax,%ecx,8)
|
||||
addl $2, %ecx
|
||||
cmpl %ecx, %esi
|
||||
jg .L48
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.L52:
|
||||
movaps %xmm0, %xmm1
|
||||
subl $1, %esi
|
||||
movss (%eax), %xmm2
|
||||
mulss (%edx), %xmm1
|
||||
addss %xmm2, %xmm1
|
||||
movss 4(%eax), %xmm2
|
||||
movss %xmm1, (%eax)
|
||||
movaps %xmm3, %xmm1
|
||||
mulss 4(%edx), %xmm1
|
||||
addl $8, %edx
|
||||
addss %xmm2, %xmm1
|
||||
movss %xmm1, 4(%eax)
|
||||
addl $8, %eax
|
||||
jmp .L44
|
||||
.size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE
|
||||
.ident "GCC: (GNU) 4.4.0 20081110 (experimental)"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
349
src/core/basic_ops_x86_sse2.s
Normal file
349
src/core/basic_ops_x86_sse2.s
Normal file
@@ -0,0 +1,349 @@
|
||||
.file "basic_ops_x86.c"
|
||||
.text
|
||||
.p2align 4,,15
|
||||
.globl alignedMemCpySSE2
|
||||
.type alignedMemCpySSE2, @function
|
||||
alignedMemCpySSE2:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 20(%esp), %esi
|
||||
movl 12(%esp), %edx
|
||||
movl 16(%esp), %ecx
|
||||
shrl $6, %esi
|
||||
testl %esi, %esi
|
||||
je .L4
|
||||
xorl %eax, %eax
|
||||
xorl %ebx, %ebx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L3:
|
||||
addl $1, %ebx
|
||||
movdqa (%ecx,%eax), %xmm0
|
||||
movdqa %xmm0, (%edx,%eax)
|
||||
movdqa 16(%ecx,%eax), %xmm0
|
||||
movdqa %xmm0, 16(%edx,%eax)
|
||||
movdqa 32(%ecx,%eax), %xmm0
|
||||
movdqa %xmm0, 32(%edx,%eax)
|
||||
movdqa 48(%ecx,%eax), %xmm0
|
||||
movdqa %xmm0, 48(%edx,%eax)
|
||||
addl $64, %eax
|
||||
cmpl %ebx, %esi
|
||||
jne .L3
|
||||
.L4:
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size alignedMemCpySSE2, .-alignedMemCpySSE2
|
||||
.p2align 4,,15
|
||||
.globl alignedMemClearSSE2
|
||||
.type alignedMemClearSSE2, @function
|
||||
alignedMemClearSSE2:
|
||||
movl 8(%esp), %ecx
|
||||
shrl $6, %ecx
|
||||
testl %ecx, %ecx
|
||||
je .L10
|
||||
movl 4(%esp), %eax
|
||||
xorl %edx, %edx
|
||||
pxor %xmm0, %xmm0
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L9:
|
||||
addl $1, %edx
|
||||
movdqa %xmm0, (%eax)
|
||||
movdqa %xmm0, 16(%eax)
|
||||
movdqa %xmm0, 32(%eax)
|
||||
movdqa %xmm0, 48(%eax)
|
||||
addl $64, %eax
|
||||
cmpl %edx, %ecx
|
||||
jne .L9
|
||||
.L10:
|
||||
rep
|
||||
ret
|
||||
.size alignedMemClearSSE2, .-alignedMemClearSSE2
|
||||
.p2align 4,,15
|
||||
.globl alignedConvertToS16SSE2
|
||||
.type alignedConvertToS16SSE2, @function
|
||||
alignedConvertToS16SSE2:
|
||||
pushl %ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
subl $8, %esp
|
||||
movl 36(%esp), %eax
|
||||
movss .LC0, %xmm4
|
||||
cmpb $0, 44(%esp)
|
||||
movl 28(%esp), %edx
|
||||
movl 32(%esp), %ebx
|
||||
movl %eax, %esi
|
||||
mulss 40(%esp), %xmm4
|
||||
jne .L13
|
||||
testw %ax, %ax
|
||||
jle .L15
|
||||
movl %eax, %edi
|
||||
shrw $2, %di
|
||||
cmpw $3, %ax
|
||||
movw %ax, 2(%esp)
|
||||
leal 0(,%edi,4), %ebp
|
||||
ja .L33
|
||||
.L28:
|
||||
xorl %ebp, %ebp
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L23:
|
||||
movswl %bp,%eax
|
||||
movl $-32768, %edi
|
||||
leal (%edx,%eax,8), %edx
|
||||
leal (%ebx,%eax,4), %eax
|
||||
movl $32767, %ebx
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L25:
|
||||
movaps %xmm4, %xmm0
|
||||
mulss (%edx), %xmm0
|
||||
cvttss2si %xmm0, %ecx
|
||||
movaps %xmm4, %xmm0
|
||||
mulss 4(%edx), %xmm0
|
||||
cmpl $-32768, %ecx
|
||||
cmovl %edi, %ecx
|
||||
cmpl $32767, %ecx
|
||||
cmovg %ebx, %ecx
|
||||
movw %cx, (%eax)
|
||||
cvttss2si %xmm0, %ecx
|
||||
cmpl $-32768, %ecx
|
||||
cmovl %edi, %ecx
|
||||
cmpl $32767, %ecx
|
||||
cmovg %ebx, %ecx
|
||||
addl $1, %ebp
|
||||
movw %cx, 2(%eax)
|
||||
addl $8, %edx
|
||||
addl $4, %eax
|
||||
cmpw %bp, %si
|
||||
jg .L25
|
||||
.L15:
|
||||
movswl %si,%esi
|
||||
addl $8, %esp
|
||||
leal 0(,%esi,4), %eax
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
ret
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L13:
|
||||
testw %ax, %ax
|
||||
jle .L15
|
||||
movl %eax, %ebp
|
||||
shrw $2, %bp
|
||||
cmpw $3, %si
|
||||
movw %ax, 2(%esp)
|
||||
leal 0(,%ebp,4), %eax
|
||||
ja .L34
|
||||
.L27:
|
||||
xorl %eax, %eax
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L18:
|
||||
movswl %ax,%edi
|
||||
leal (%edx,%edi,8), %ecx
|
||||
leal (%ebx,%edi,4), %edx
|
||||
movl $-32768, %edi
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L20:
|
||||
movaps %xmm4, %xmm0
|
||||
movl $32767, %ebp
|
||||
mulss (%ecx), %xmm0
|
||||
cvttss2si %xmm0, %ebx
|
||||
movaps %xmm4, %xmm0
|
||||
mulss 4(%ecx), %xmm0
|
||||
cmpl $-32768, %ebx
|
||||
cmovl %edi, %ebx
|
||||
cmpl $32767, %ebx
|
||||
cmovg %ebp, %ebx
|
||||
movzbl %bh, %ebp
|
||||
sall $8, %ebx
|
||||
orl %ebp, %ebx
|
||||
movl $32767, %ebp
|
||||
movw %bx, (%edx)
|
||||
cvttss2si %xmm0, %ebx
|
||||
cmpl $-32768, %ebx
|
||||
cmovl %edi, %ebx
|
||||
cmpl $32767, %ebx
|
||||
cmovg %ebp, %ebx
|
||||
addl $1, %eax
|
||||
movzbl %bh, %ebp
|
||||
addl $8, %ecx
|
||||
sall $8, %ebx
|
||||
orl %ebp, %ebx
|
||||
movw %bx, 2(%edx)
|
||||
addl $4, %edx
|
||||
cmpw %ax, %si
|
||||
jg .L20
|
||||
jmp .L15
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L34:
|
||||
testw %ax, %ax
|
||||
je .L27
|
||||
movaps %xmm4, %xmm0
|
||||
xorl %ecx, %ecx
|
||||
movdqa .LC1, %xmm1
|
||||
movss %xmm4, 4(%esp)
|
||||
shufps $0, %xmm0, %xmm0
|
||||
xorl %edi, %edi
|
||||
movaps %xmm0, %xmm7
|
||||
movdqa .LC2, %xmm0
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L19:
|
||||
movaps %xmm7, %xmm3
|
||||
movdqa %xmm0, %xmm5
|
||||
movdqa %xmm0, %xmm6
|
||||
movaps %xmm7, %xmm2
|
||||
addl $1, %edi
|
||||
mulps (%edx,%ecx,2), %xmm3
|
||||
mulps 16(%edx,%ecx,2), %xmm2
|
||||
cvttps2dq %xmm3, %xmm3
|
||||
movdqa %xmm3, %xmm4
|
||||
pcmpgtd %xmm1, %xmm4
|
||||
pand %xmm4, %xmm3
|
||||
pandn %xmm1, %xmm4
|
||||
por %xmm4, %xmm3
|
||||
cvttps2dq %xmm2, %xmm2
|
||||
movdqa %xmm3, %xmm4
|
||||
pcmpgtd %xmm0, %xmm4
|
||||
pand %xmm4, %xmm5
|
||||
pandn %xmm3, %xmm4
|
||||
movdqa %xmm4, %xmm3
|
||||
movdqa %xmm2, %xmm4
|
||||
por %xmm5, %xmm3
|
||||
pcmpgtd %xmm1, %xmm4
|
||||
movdqa .LC3, %xmm5
|
||||
pand %xmm4, %xmm2
|
||||
pand %xmm3, %xmm5
|
||||
pandn %xmm1, %xmm4
|
||||
psrad $8, %xmm5
|
||||
por %xmm4, %xmm2
|
||||
pslld $8, %xmm3
|
||||
movdqa %xmm2, %xmm4
|
||||
pcmpgtd %xmm0, %xmm4
|
||||
pand %xmm4, %xmm6
|
||||
pandn %xmm2, %xmm4
|
||||
movdqa %xmm4, %xmm2
|
||||
por %xmm6, %xmm2
|
||||
movdqa .LC3, %xmm6
|
||||
pand %xmm2, %xmm6
|
||||
pslld $8, %xmm2
|
||||
psrad $8, %xmm6
|
||||
movdqa %xmm5, %xmm4
|
||||
punpcklwd %xmm6, %xmm5
|
||||
punpckhwd %xmm6, %xmm4
|
||||
movdqa %xmm5, %xmm6
|
||||
punpcklwd %xmm4, %xmm5
|
||||
punpckhwd %xmm4, %xmm6
|
||||
movdqa %xmm3, %xmm4
|
||||
punpcklwd %xmm6, %xmm5
|
||||
punpckhwd %xmm2, %xmm4
|
||||
punpcklwd %xmm2, %xmm3
|
||||
movdqa %xmm3, %xmm6
|
||||
punpcklwd %xmm4, %xmm3
|
||||
punpckhwd %xmm4, %xmm6
|
||||
punpcklwd %xmm6, %xmm3
|
||||
por %xmm3, %xmm5
|
||||
movdqa %xmm5, (%ebx,%ecx)
|
||||
addl $16, %ecx
|
||||
cmpw %di, %bp
|
||||
ja .L19
|
||||
cmpw 2(%esp), %ax
|
||||
movss 4(%esp), %xmm4
|
||||
jne .L18
|
||||
jmp .L15
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L33:
|
||||
testw %bp, %bp
|
||||
.p2align 4,,3
|
||||
.p2align 3
|
||||
je .L28
|
||||
movaps %xmm4, %xmm0
|
||||
xorl %eax, %eax
|
||||
movdqa .LC1, %xmm1
|
||||
shufps $0, %xmm0, %xmm0
|
||||
xorl %ecx, %ecx
|
||||
movaps %xmm0, %xmm6
|
||||
movdqa .LC2, %xmm0
|
||||
.p2align 4,,7
|
||||
.p2align 3
|
||||
.L24:
|
||||
movaps %xmm6, %xmm3
|
||||
addl $1, %ecx
|
||||
movdqa %xmm0, %xmm7
|
||||
movaps %xmm6, %xmm2
|
||||
mulps (%edx,%eax,2), %xmm3
|
||||
mulps 16(%edx,%eax,2), %xmm2
|
||||
cvttps2dq %xmm3, %xmm3
|
||||
movdqa %xmm3, %xmm5
|
||||
pcmpgtd %xmm1, %xmm5
|
||||
pand %xmm5, %xmm3
|
||||
pandn %xmm1, %xmm5
|
||||
por %xmm5, %xmm3
|
||||
cvttps2dq %xmm2, %xmm2
|
||||
movdqa %xmm3, %xmm5
|
||||
pcmpgtd %xmm0, %xmm5
|
||||
pand %xmm5, %xmm7
|
||||
pandn %xmm3, %xmm5
|
||||
movdqa %xmm5, %xmm3
|
||||
movdqa %xmm2, %xmm5
|
||||
por %xmm7, %xmm3
|
||||
pcmpgtd %xmm1, %xmm5
|
||||
movdqa %xmm0, %xmm7
|
||||
pand %xmm5, %xmm2
|
||||
pandn %xmm1, %xmm5
|
||||
por %xmm5, %xmm2
|
||||
movdqa %xmm2, %xmm5
|
||||
pcmpgtd %xmm0, %xmm5
|
||||
pand %xmm5, %xmm7
|
||||
pandn %xmm2, %xmm5
|
||||
movdqa %xmm5, %xmm2
|
||||
movdqa %xmm3, %xmm5
|
||||
por %xmm7, %xmm2
|
||||
punpckhwd %xmm2, %xmm5
|
||||
punpcklwd %xmm2, %xmm3
|
||||
movdqa %xmm3, %xmm7
|
||||
punpcklwd %xmm5, %xmm3
|
||||
punpckhwd %xmm5, %xmm7
|
||||
punpcklwd %xmm7, %xmm3
|
||||
movdqa %xmm3, (%ebx,%eax)
|
||||
addl $16, %eax
|
||||
cmpw %cx, %di
|
||||
ja .L24
|
||||
cmpw %bp, 2(%esp)
|
||||
jne .L23
|
||||
jmp .L15
|
||||
.size alignedConvertToS16SSE2, .-alignedConvertToS16SSE2
|
||||
.section .rodata.cst4,"aM",@progbits,4
|
||||
.align 4
|
||||
.LC0:
|
||||
.long 1191181824
|
||||
.section .rodata.cst16,"aM",@progbits,16
|
||||
.align 16
|
||||
.LC1:
|
||||
.long -32768
|
||||
.long -32768
|
||||
.long -32768
|
||||
.long -32768
|
||||
.align 16
|
||||
.LC2:
|
||||
.long 32767
|
||||
.long 32767
|
||||
.long 32767
|
||||
.long 32767
|
||||
.align 16
|
||||
.LC3:
|
||||
.long 65280
|
||||
.long 65280
|
||||
.long 65280
|
||||
.long 65280
|
||||
.ident "GCC: (GNU) 4.4.0 20081110 (experimental)"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
@@ -28,6 +28,7 @@
|
||||
#include <QtXml/QDomElement>
|
||||
|
||||
#include "fx_mixer.h"
|
||||
#include "basic_ops.h"
|
||||
#include "effect.h"
|
||||
#include "song.h"
|
||||
|
||||
@@ -38,7 +39,7 @@ fxChannel::fxChannel( model * _parent ) :
|
||||
m_stillRunning( false ),
|
||||
m_peakLeft( 0.0f ),
|
||||
m_peakRight( 0.0f ),
|
||||
m_buffer( new sampleFrame[engine::getMixer()->framesPerPeriod()] ),
|
||||
m_buffer( alignedAllocFrames( engine::getMixer()->framesPerPeriod() ) ),
|
||||
m_muteModel( false, _parent ),
|
||||
m_volumeModel( 1.0, 0.0, 2.0, 0.01, _parent ),
|
||||
m_name(),
|
||||
@@ -53,7 +54,7 @@ fxChannel::fxChannel( model * _parent ) :
|
||||
|
||||
fxChannel::~fxChannel()
|
||||
{
|
||||
delete[] m_buffer;
|
||||
alignedFreeFrames( m_buffer );
|
||||
}
|
||||
|
||||
|
||||
@@ -92,13 +93,7 @@ void fxMixer::mixToChannel( const sampleFrame * _buf, fx_ch_t _ch )
|
||||
if( m_fxChannels[_ch]->m_muteModel.value() == false )
|
||||
{
|
||||
m_fxChannels[_ch]->m_lock.lock();
|
||||
sampleFrame * buf = m_fxChannels[_ch]->m_buffer;
|
||||
for( f_cnt_t f = 0; f < engine::getMixer()->framesPerPeriod();
|
||||
++f )
|
||||
{
|
||||
buf[f][0] += _buf[f][0];
|
||||
buf[f][1] += _buf[f][1];
|
||||
}
|
||||
alignedBufMix( m_fxChannels[_ch]->m_buffer, _buf, engine::getMixer()->framesPerPeriod() );
|
||||
m_fxChannels[_ch]->m_used = true;
|
||||
m_fxChannels[_ch]->m_lock.unlock();
|
||||
}
|
||||
|
||||
@@ -57,6 +57,7 @@
|
||||
#include "main_window.h"
|
||||
#include "project_renderer.h"
|
||||
#include "song.h"
|
||||
#include "basic_ops.h"
|
||||
|
||||
#warning TODO: move somewhere else
|
||||
static inline QString baseName( const QString & _file )
|
||||
@@ -78,12 +79,29 @@ inline void loadTranslation( const QString & _tname,
|
||||
}
|
||||
|
||||
|
||||
Uint32 convertToS16( const sampleFrameA * RP _ab,
|
||||
const fpp_t _frames,
|
||||
const float _master_gain,
|
||||
intSampleFrameA * RP _output_buffer,
|
||||
const bool _convert_endian );
|
||||
|
||||
int main( int argc, char * * argv )
|
||||
{
|
||||
// intialize RNG
|
||||
srand( getpid() + time( 0 ) );
|
||||
|
||||
// init CPU specific optimized basic ops
|
||||
initBasicOps();
|
||||
|
||||
#if 0
|
||||
sampleFrameA * buf = (sampleFrameA *) alignedMalloc( sizeof( sampleFrameA ) * 256 );
|
||||
intSampleFrameA * obuf = (intSampleFrameA*)alignedMalloc( sizeof( intSampleFrameA ) * 256 );
|
||||
for( int i = 0; i< 1000000; ++i )
|
||||
{
|
||||
convertToS16( buf, 256, 0.7, obuf, false );
|
||||
}
|
||||
return 0;
|
||||
#endif
|
||||
bool core_only = FALSE;
|
||||
|
||||
for( int i = 1; i < argc; ++i )
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
#include "sample_play_handle.h"
|
||||
#include "piano_roll.h"
|
||||
#include "micro_timer.h"
|
||||
#include "basic_ops.h"
|
||||
|
||||
#include "audio_device.h"
|
||||
#include "midi_client.h"
|
||||
@@ -61,40 +62,15 @@
|
||||
#include "midi_winmm.h"
|
||||
#include "midi_dummy.h"
|
||||
|
||||
#ifdef LMMS_HAVE_PTHREAD_H
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
|
||||
static QVector<fx_ch_t> __fx_channel_jobs( NumFxChannels );
|
||||
|
||||
|
||||
|
||||
static void aligned_free( void * _buf )
|
||||
{
|
||||
if( _buf != NULL )
|
||||
{
|
||||
int *ptr2=(int *)_buf - 1;
|
||||
_buf = (char *)_buf- *ptr2;
|
||||
free(_buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void * aligned_malloc( int _bytes )
|
||||
{
|
||||
char *ptr,*ptr2,*aligned_ptr;
|
||||
int align_mask = ALIGN_SIZE- 1;
|
||||
ptr=(char *)malloc(_bytes +ALIGN_SIZE+ sizeof(int));
|
||||
if(ptr==NULL) return(NULL);
|
||||
|
||||
ptr2 = ptr + sizeof(int);
|
||||
aligned_ptr = ptr2 + (ALIGN_SIZE- ((size_t)ptr2 & align_mask));
|
||||
|
||||
|
||||
ptr2 = aligned_ptr - sizeof(int);
|
||||
*((int *)ptr2)=(int)(aligned_ptr - ptr);
|
||||
|
||||
return(aligned_ptr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
class mixerWorkerThread : public QThread
|
||||
{
|
||||
public:
|
||||
@@ -152,9 +128,7 @@ public:
|
||||
|
||||
mixerWorkerThread( int _worker_num, mixer * _mixer ) :
|
||||
QThread( _mixer ),
|
||||
m_workingBuf( (sampleFrame *) aligned_malloc(
|
||||
_mixer->framesPerPeriod() *
|
||||
sizeof( sampleFrame ) ) ),
|
||||
m_workingBuf( alignedAllocFrames( _mixer->framesPerPeriod() ) ),
|
||||
m_workerNum( _worker_num ),
|
||||
m_quit( false ),
|
||||
m_mixer( _mixer ),
|
||||
@@ -165,7 +139,7 @@ public:
|
||||
|
||||
virtual ~mixerWorkerThread()
|
||||
{
|
||||
aligned_free( m_workingBuf );
|
||||
alignedFreeFrames( m_workingBuf );
|
||||
}
|
||||
|
||||
virtual void quit( void )
|
||||
@@ -234,11 +208,11 @@ private:
|
||||
{
|
||||
#if 0
|
||||
#ifdef LMMS_BUILD_LINUX
|
||||
#ifdef LMMS_HAVE_SCHED_H
|
||||
#ifdef LMMS_HAVE_PTHREAD_H
|
||||
cpu_set_t mask;
|
||||
CPU_ZERO( &mask );
|
||||
CPU_SET( m_workerNum, &mask );
|
||||
sched_setaffinity( 0, sizeof( mask ), &mask );
|
||||
pthread_setaffinity_np( pthread_self(), sizeof( mask ), &mask );
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
@@ -310,7 +284,8 @@ mixer::mixer( void ) :
|
||||
{
|
||||
m_inputBufferFrames[i] = 0;
|
||||
m_inputBufferSize[i] = DEFAULT_BUFFER_SIZE * 100;
|
||||
m_inputBuffer[i] = new sampleFrame[ DEFAULT_BUFFER_SIZE * 100 ];
|
||||
m_inputBuffer[i] = alignedAllocFrames(
|
||||
DEFAULT_BUFFER_SIZE * 100 );
|
||||
clearAudioBuffer( m_inputBuffer[i], m_inputBufferSize[i] );
|
||||
}
|
||||
|
||||
@@ -351,14 +326,10 @@ mixer::mixer( void ) :
|
||||
m_fifo = new fifo( 1 );
|
||||
}
|
||||
|
||||
m_workingBuf = (sampleFrame*) aligned_malloc( m_framesPerPeriod *
|
||||
sizeof( sampleFrame ) );
|
||||
m_workingBuf = alignedAllocFrames( m_framesPerPeriod );
|
||||
for( Uint8 i = 0; i < 3; i++ )
|
||||
{
|
||||
m_readBuf = (surroundSampleFrame*)
|
||||
aligned_malloc( m_framesPerPeriod *
|
||||
sizeof( surroundSampleFrame ) );
|
||||
|
||||
m_readBuf = alignedAllocFrames( m_framesPerPeriod );
|
||||
clearAudioBuffer( m_readBuf, m_framesPerPeriod );
|
||||
m_bufferPool.push_back( m_readBuf );
|
||||
}
|
||||
@@ -409,10 +380,10 @@ mixer::~mixer()
|
||||
|
||||
for( Uint8 i = 0; i < 3; i++ )
|
||||
{
|
||||
aligned_free( m_bufferPool[i] );
|
||||
alignedFreeFrames( m_bufferPool[i] );
|
||||
}
|
||||
|
||||
aligned_free( m_workingBuf );
|
||||
alignedFreeFrames( m_workingBuf );
|
||||
}
|
||||
|
||||
|
||||
@@ -524,9 +495,9 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames )
|
||||
if( frames + _frames > size )
|
||||
{
|
||||
size = qMax( size * 2, frames + _frames );
|
||||
sampleFrame * ab = new sampleFrame[ size ];
|
||||
memcpy( ab, buf, frames * sizeof( sampleFrame ) );
|
||||
delete [] buf;
|
||||
sampleFrame * ab = alignedAllocFrames( size );
|
||||
alignedMemCpy( ab, buf, frames * sizeof( sampleFrame ) );
|
||||
alignedFreeFrames( buf );
|
||||
|
||||
m_inputBufferSize[ m_inputBufferWrite ] = size;
|
||||
m_inputBuffer[ m_inputBufferWrite ] = ab;
|
||||
@@ -534,7 +505,7 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames )
|
||||
buf = ab;
|
||||
}
|
||||
|
||||
memcpy( &buf[ frames ], _ab, _frames * sizeof( sampleFrame ) );
|
||||
alignedMemCpy( &buf[ frames ], _ab, _frames * sizeof( sampleFrame ) );
|
||||
m_inputBufferFrames[ m_inputBufferWrite ] += _frames;
|
||||
|
||||
unlockInputFrames();
|
||||
@@ -543,7 +514,7 @@ void mixer::pushInputFrames( sampleFrame * _ab, const f_cnt_t _frames )
|
||||
|
||||
|
||||
|
||||
const surroundSampleFrame * mixer::renderNextBuffer( void )
|
||||
sampleFrameA * mixer::renderNextBuffer( void )
|
||||
{
|
||||
microTimer timer;
|
||||
static song::playPos last_metro_pos = -1;
|
||||
@@ -709,12 +680,9 @@ void mixer::bufferToPort( const sampleFrame * _buf,
|
||||
const int loop1_frame = qMin<int>( end_frame, m_framesPerPeriod );
|
||||
|
||||
_port->lockFirstBuffer();
|
||||
sampleFrame * obuf = _port->firstBuffer()+start_frame;
|
||||
for( int frame = 0; frame < loop1_frame-start_frame; ++frame )
|
||||
{
|
||||
obuf[frame][0] += _buf[frame][0] * _vv.vol[0];
|
||||
obuf[frame][1] += _buf[frame][1] * _vv.vol[1];
|
||||
}
|
||||
unalignedBufMixLRCoeff( _port->firstBuffer() + start_frame,
|
||||
_buf, _vv.vol[0], _vv.vol[1],
|
||||
loop1_frame - start_frame );
|
||||
_port->unlockFirstBuffer();
|
||||
|
||||
_port->lockSecondBuffer();
|
||||
@@ -723,14 +691,10 @@ void mixer::bufferToPort( const sampleFrame * _buf,
|
||||
const int frames_done = m_framesPerPeriod - start_frame;
|
||||
end_frame -= m_framesPerPeriod;
|
||||
end_frame = qMin<int>( end_frame, m_framesPerPeriod );
|
||||
sampleFrame * obuf = _port->secondBuffer();
|
||||
for( fpp_t frame = 0; frame < end_frame; ++frame )
|
||||
{
|
||||
obuf[frame][0] += _buf[frames_done + frame][0] *
|
||||
_vv.vol[0];
|
||||
obuf[frame][1] += _buf[frames_done + frame][1] *
|
||||
_vv.vol[1];
|
||||
}
|
||||
unalignedBufMixLRCoeff( _port->secondBuffer(),
|
||||
_buf+frames_done,
|
||||
_vv.vol[0], _vv.vol[1],
|
||||
end_frame );
|
||||
// we used both buffers so set flags
|
||||
_port->m_bufferUsage = audioPort::BothBuffers;
|
||||
}
|
||||
@@ -748,7 +712,14 @@ void mixer::bufferToPort( const sampleFrame * _buf,
|
||||
void mixer::clearAudioBuffer( sampleFrame * _ab, const f_cnt_t _frames,
|
||||
const f_cnt_t _offset )
|
||||
{
|
||||
memset( _ab+_offset, 0, sizeof( *_ab ) * _frames );
|
||||
if( likely( (int)( _ab+_offset ) % 16 == 0 && _frames % 8 == 0 ) )
|
||||
{
|
||||
alignedMemClear( _ab+_offset, sizeof( *_ab ) * _frames );
|
||||
}
|
||||
else
|
||||
{
|
||||
memset( _ab+_offset, 0, sizeof( *_ab ) * _frames );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1166,11 +1137,11 @@ void mixer::fifoWriter::run( void )
|
||||
{
|
||||
#if 0
|
||||
#ifdef LMMS_BUILD_LINUX
|
||||
#ifdef LMMS_HAVE_SCHED_H
|
||||
#ifdef LMMS_HAVE_PTHREAD_H
|
||||
cpu_set_t mask;
|
||||
CPU_ZERO( &mask );
|
||||
CPU_SET( 0, &mask );
|
||||
sched_setaffinity( 0, sizeof( mask ), &mask );
|
||||
pthread_setaffinity_np( pthread_self(), sizeof( mask ), &mask );
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
@@ -1178,9 +1149,9 @@ void mixer::fifoWriter::run( void )
|
||||
const fpp_t frames = m_mixer->framesPerPeriod();
|
||||
while( m_writing )
|
||||
{
|
||||
surroundSampleFrame * buffer = new surroundSampleFrame[frames];
|
||||
const surroundSampleFrame * b = m_mixer->renderNextBuffer();
|
||||
memcpy( buffer, b, frames * sizeof( surroundSampleFrame ) );
|
||||
sampleFrameA * buffer = alignedAllocFrames( frames );
|
||||
const sampleFrameA * b = m_mixer->renderNextBuffer();
|
||||
alignedMemCpy( buffer, b, frames * sizeof( sampleFrameA ) );
|
||||
m_fifo->write( buffer );
|
||||
}
|
||||
|
||||
|
||||
@@ -32,11 +32,12 @@
|
||||
#include "audio_file_wave.h"
|
||||
#include "audio_file_ogg.h"
|
||||
|
||||
#ifdef LMMS_HAVE_SCHED_H
|
||||
#include <sched.h>
|
||||
#ifdef LMMS_HAVE_PTHREAD_H
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
fileEncodeDevice __fileEncodeDevices[] =
|
||||
{
|
||||
|
||||
@@ -148,11 +149,11 @@ void projectRenderer::run( void )
|
||||
{
|
||||
#if 0
|
||||
#ifdef LMMS_BUILD_LINUX
|
||||
#ifdef LMMS_HAVE_SCHED_H
|
||||
#ifdef LMMS_HAVE_PTHREAD_H
|
||||
cpu_set_t mask;
|
||||
CPU_ZERO( &mask );
|
||||
CPU_SET( 0, &mask );
|
||||
sched_setaffinity( 0, sizeof( mask ), &mask );
|
||||
pthread_setaffinity_np( pthread_self(), sizeof( mask ), &mask );
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user