mirror of
https://github.com/LMMS/lmms.git
synced 2026-03-10 18:19:42 -04:00
The new CPU framework replaces the old BasicOps framework. It is more flexible and the build process isn't such a mess anymore (pre-compiled assembler files etc.). It will hopefully see some improvements and extensions soon. Signed-off-by: Tobias Doerffel <tobias.doerffel@gmail.com>
471 lines
10 KiB
C++
471 lines
10 KiB
C++
/*
|
|
* Cpu.cpp - CPU specific accellerated operations
|
|
*
|
|
* Copyright (c) 2008-2009 Tobias Doerffel <tobydox/at/users.sourceforge.net>
|
|
*
|
|
* This file is part of Linux MultiMedia Studio - http://lmms.sourceforge.net
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program (see COPYING); if not, write to the
|
|
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
* Boston, MA 02110-1301 USA.
|
|
*
|
|
*/
|
|
|
|
|
|
#include "Cpu.h"
|
|
|
|
#include <cstdlib>
|
|
#include <cstdio>
|
|
#include <memory.h>
|
|
|
|
|
|
namespace CPU
|
|
{
|
|
|
|
void * memAlloc( int _bytes )
|
|
{
|
|
char *ptr,*ptr2,*_ptr;
|
|
int align_mask = ALIGN_SIZE- 1;
|
|
ptr =(char *) malloc( _bytes + ALIGN_SIZE + sizeof(int) );
|
|
if( ptr == NULL )
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
ptr2 = ptr + sizeof(int);
|
|
_ptr = ptr2 + ( ALIGN_SIZE- ( (size_t) ptr2 & align_mask ) );
|
|
|
|
|
|
ptr2 = _ptr - sizeof(int);
|
|
*((int *) ptr2) = (int)( _ptr - ptr );
|
|
|
|
return _ptr;
|
|
}
|
|
|
|
|
|
|
|
|
|
void memFree( void * _buf )
|
|
{
|
|
if( _buf )
|
|
{
|
|
int * ptr2 = (int *) _buf - 1;
|
|
void * buf2 = (char *) _buf - *ptr2;
|
|
if( buf2 )
|
|
{
|
|
free( buf2 );
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
sampleFrameA * allocFrames( int _n )
|
|
{
|
|
return (sampleFrameA *) memAlloc( _n * sizeof( sampleFrameA ) );
|
|
}
|
|
|
|
|
|
|
|
|
|
void freeFrames( sampleFrame * _buf )
|
|
{
|
|
memFree( _buf );
|
|
}
|
|
|
|
|
|
|
|
|
|
// slow fallback
|
|
void memCpyNoOpt( void * RP _dst, const void * RP _src, int _size )
|
|
{
|
|
const int s = _size / sizeof( int );
|
|
const int * RP src = (const int *) _src;
|
|
int * RP dst = (int *) _dst;
|
|
for( int i = 0; i < s; )
|
|
{
|
|
dst[i+0] = src[i+0];
|
|
dst[i+1] = src[i+1];
|
|
dst[i+2] = src[i+2];
|
|
dst[i+3] = src[i+3];
|
|
dst[i+4] = src[i+4];
|
|
dst[i+5] = src[i+5];
|
|
dst[i+6] = src[i+6];
|
|
dst[i+7] = src[i+7];
|
|
dst[i+8] = src[i+8];
|
|
dst[i+9] = src[i+9];
|
|
dst[i+10] = src[i+10];
|
|
dst[i+11] = src[i+11];
|
|
dst[i+12] = src[i+12];
|
|
dst[i+13] = src[i+13];
|
|
dst[i+14] = src[i+14];
|
|
dst[i+15] = src[i+15];
|
|
i += 16;
|
|
}
|
|
}
|
|
|
|
|
|
// slow fallback
|
|
void memClearNoOpt( void * _dst, int _size )
|
|
{
|
|
const int s = _size / ( sizeof( int ) * 4 );
|
|
int * dst = (int *) _dst;
|
|
for( int i = 0; i < s; ++i )
|
|
{
|
|
dst[0] = 0;
|
|
dst[1] = 0;
|
|
dst[2] = 0;
|
|
dst[3] = 0;
|
|
dst += 4;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void bufApplyGainNoOpt( sampleFrameA * RP _dst, float _gain,
|
|
int _frames )
|
|
{
|
|
for( int i = 0; i < _frames; )
|
|
{
|
|
_dst[i+0][0] *= _gain;
|
|
_dst[i+0][1] *= _gain;
|
|
_dst[i+1][0] *= _gain;
|
|
_dst[i+1][1] *= _gain;
|
|
_dst[i+2][0] *= _gain;
|
|
_dst[i+2][1] *= _gain;
|
|
_dst[i+3][0] *= _gain;
|
|
_dst[i+3][1] *= _gain;
|
|
_dst[i+4][0] *= _gain;
|
|
_dst[i+4][1] *= _gain;
|
|
_dst[i+5][0] *= _gain;
|
|
_dst[i+5][1] *= _gain;
|
|
_dst[i+6][0] *= _gain;
|
|
_dst[i+6][1] *= _gain;
|
|
_dst[i+7][0] *= _gain;
|
|
_dst[i+7][1] *= _gain;
|
|
i += 8;
|
|
}
|
|
}
|
|
|
|
|
|
void bufMixNoOpt( sampleFrameA * RP _dst, const sampleFrameA * RP _src,
|
|
int _frames )
|
|
{
|
|
for( int i = 0; i < _frames; )
|
|
{
|
|
_dst[i+0][0] += _src[i+0][0];
|
|
_dst[i+0][1] += _src[i+0][1];
|
|
_dst[i+1][0] += _src[i+1][0];
|
|
_dst[i+1][1] += _src[i+1][1];
|
|
_dst[i+2][0] += _src[i+2][0];
|
|
_dst[i+2][1] += _src[i+2][1];
|
|
_dst[i+3][0] += _src[i+3][0];
|
|
_dst[i+3][1] += _src[i+3][1];
|
|
i += 4;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void bufMixLRCoeffNoOpt( sampleFrameA * RP _dst,
|
|
const sampleFrameA * RP _src,
|
|
float _left, float _right, int _frames )
|
|
{
|
|
for( int i = 0; i < _frames; )
|
|
{
|
|
_dst[i+0][0] += _src[i+0][0]*_left;
|
|
_dst[i+0][1] += _src[i+0][1]*_right;
|
|
_dst[i+1][0] += _src[i+1][0]*_left;
|
|
_dst[i+1][1] += _src[i+1][1]*_right;
|
|
_dst[i+2][0] += _src[i+2][0]*_left;
|
|
_dst[i+2][1] += _src[i+2][1]*_right;
|
|
_dst[i+3][0] += _src[i+3][0]*_left;
|
|
_dst[i+3][1] += _src[i+3][1]*_right;
|
|
i += 4;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void unalignedBufMixLRCoeffNoOpt( sampleFrame * RP _dst,
|
|
const sampleFrame * RP _src,
|
|
const float _left,
|
|
const float _right,
|
|
int _frames )
|
|
{
|
|
if( _frames % 2 )
|
|
{
|
|
_dst[0][0] += _src[0][0] * _left;
|
|
_dst[0][1] += _src[0][1] * _right;
|
|
++_src;
|
|
++_dst;
|
|
--_frames;
|
|
}
|
|
for( int i = 0; i < _frames; )
|
|
{
|
|
_dst[i+0][0] += _src[i+0][0]*_left;
|
|
_dst[i+0][1] += _src[i+0][1]*_right;
|
|
_dst[i+1][0] += _src[i+1][0]*_left;
|
|
_dst[i+1][1] += _src[i+1][1]*_right;
|
|
i += 2;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void bufWetDryMixNoOpt( sampleFrameA * RP _dst,
|
|
const sampleFrameA * RP _src,
|
|
float _wet, float _dry, int _frames )
|
|
{
|
|
for( int i = 0; i < _frames; ++i )
|
|
{
|
|
_dst[i+0][0] = _dst[i+0][0]*_dry + _src[i+0][0]*_wet;
|
|
_dst[i+0][1] = _dst[i+0][1]*_dry + _src[i+0][1]*_wet;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
void bufWetDryMixSplittedNoOpt( sampleFrameA * RP _dst,
|
|
const float * RP _left,
|
|
const float * RP _right,
|
|
float _wet, float _dry, int _frames )
|
|
{
|
|
int i;
|
|
for( i = 0; i < _frames; ++i )
|
|
{
|
|
_dst[i+0][0] = _dst[i+0][0]*_dry + _left[i+0]*_wet;
|
|
_dst[i+0][1] = _dst[i+0][1]*_dry + _right[i+0]*_wet;
|
|
++i;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
int convertToS16NoOpt( const sampleFrameA * RP _src,
|
|
intSampleFrameA * RP _dst,
|
|
const fpp_t _frames,
|
|
const float _master_gain,
|
|
const bool _convert_endian )
|
|
{
|
|
int t1;
|
|
int t2;
|
|
const float f = _master_gain * OUTPUT_SAMPLE_MULTIPLIER;
|
|
if( _convert_endian )
|
|
{
|
|
for( fpp_t frame = 0; frame < _frames; ++frame )
|
|
{
|
|
t1 = _src[frame][0] * f;
|
|
t1 = unlikely( t1 > 32767 ) ? 32767 : t1;
|
|
t1 = unlikely( t1 < -32768 ) ? -32768 : t1;
|
|
_dst[frame][0] = ( t1 & 0x00ff) << 8 |
|
|
( t1 & 0xff00 ) >> 8;
|
|
|
|
t2 = _src[frame][1] * f;
|
|
t2 = unlikely( t2 > 32767 ) ? 32767 : t2;
|
|
t2 = unlikely( t2 < -32768 ) ? -32768 : t2;
|
|
_dst[frame][1] = ( t2 & 0x00ff) << 8 |
|
|
( t2 & 0xff00 ) >> 8;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( fpp_t frame = 0; frame < _frames; ++frame )
|
|
{
|
|
t1 = _src[frame][0] * f;
|
|
t1 = unlikely( t1 > 32767 ) ? 32767 : t1;
|
|
t1 = unlikely( t1 < -32768 ) ? -32768 : t1;
|
|
_dst[frame][0] = t1;
|
|
|
|
t2 = _src[frame][1] * f;
|
|
t2 = unlikely( t2 > 32767 ) ? 32767 : t2;
|
|
t2 = unlikely( t2 < -32768 ) ? -32768 : t2;
|
|
_dst[frame][1] = t2;
|
|
}
|
|
}
|
|
|
|
return _frames * DEFAULT_CHANNELS * BYTES_PER_INT_SAMPLE;
|
|
}
|
|
|
|
|
|
MemCpyFunc memCpy = memCpyNoOpt;
|
|
MemClearFunc memClear = memClearNoOpt;
|
|
BufApplyGainFunc bufApplyGain = bufApplyGainNoOpt;
|
|
BufMixFunc bufMix = bufMixNoOpt;
|
|
BufMixLRCoeffFunc bufMixLRCoeff = bufMixLRCoeffNoOpt;
|
|
UnalignedBufMixLRCoeffFunc unalignedBufMixLRCoeff = unalignedBufMixLRCoeffNoOpt;
|
|
BufWetDryMixFunc bufWetDryMix = bufWetDryMixNoOpt;
|
|
BufWetDryMixSplittedFunc bufWetDryMixSplitted = bufWetDryMixSplittedNoOpt;
|
|
ConvertToS16Func convertToS16 = convertToS16NoOpt;
|
|
|
|
|
|
#ifdef X86_OPTIMIZATIONS
|
|
enum CPUFeatures
|
|
{
|
|
None = 0,
|
|
MMX = 0x1,
|
|
MMXEXT = 0x2,
|
|
MMX3DNOW = 0x4,
|
|
MMX3DNOWEXT = 0x8,
|
|
SSE = 0x10,
|
|
SSE2 = 0x20,
|
|
CMOV = 0x40,
|
|
IWMMXT = 0x80
|
|
};
|
|
|
|
extern "C"
|
|
{
|
|
#ifdef LMMS_HOST_X86
|
|
void memCpyMMX( void * RP _dst, const void * RP _src, int _size );
|
|
void memClearMMX( void * RP _dst, int _size );
|
|
#endif
|
|
void memCpySSE( void * RP _dst, const void * RP _src, int _size );
|
|
void memClearSSE( void * RP _dst, int _size );
|
|
void bufApplyGainSSE( sampleFrameA * RP _dst, float _gain, int _frames );
|
|
void bufMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, int _frames );
|
|
void bufMixLRCoeffSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _left, float _right, int _frames );
|
|
void unalignedBufMixLRCoeffSSE( sampleFrame * RP _dst, const sampleFrame * RP _src, const float _left, const float _right, int _frames );
|
|
void bufWetDryMixSSE( sampleFrameA * RP _dst, const sampleFrameA * RP _src, float _wet, float _dry, int _frames );
|
|
void bufWetDryMixSplittedSSE( sampleFrameA * RP _dst, const float * RP _left, const float * RP _right, float _wet, float _dry, int _frames );
|
|
#ifdef X86_OPTIMIZATIONS
|
|
void memCpySSE2( void * RP _dst, const void * RP _src, int _size );
|
|
void memClearSSE2( void * RP _dst, int _size );
|
|
int convertToS16SSE2( const sampleFrameA * RP _src, intSampleFrameA * RP _dst, const fpp_t _frames, const float _master_gain, const bool _convert_endian );
|
|
#endif
|
|
} ;
|
|
#endif
|
|
|
|
|
|
|
|
void init()
|
|
{
|
|
#ifdef X86_OPTIMIZATIONS
|
|
static bool extensions_checked = false;
|
|
if( extensions_checked == false )
|
|
{
|
|
int features = 0;
|
|
#if defined(__x86_64__) || defined(Q_OS_WIN64)
|
|
features = MMX | SSE | SSE2 | CMOV;
|
|
#elif defined(__ia64__)
|
|
features = MMX | SSE | SSE2;
|
|
#else
|
|
unsigned int result = 0;
|
|
unsigned int extended_result = 0;
|
|
asm( "push %%ebx\n"
|
|
"pushf\n"
|
|
"pop %%eax\n"
|
|
"mov %%eax, %%ebx\n"
|
|
"xor $0x00200000, %%eax\n"
|
|
"push %%eax\n"
|
|
"popf\n"
|
|
"pushf\n"
|
|
"pop %%eax\n"
|
|
"xor %%edx, %%edx\n"
|
|
"xor %%ebx, %%eax\n"
|
|
"jz 1f\n"
|
|
|
|
"mov $0x00000001, %%eax\n"
|
|
"cpuid\n"
|
|
"1:\n"
|
|
"pop %%ebx\n"
|
|
"mov %%edx, %0\n"
|
|
|
|
: "=r" (result)
|
|
:
|
|
: "%eax", "%ecx", "%edx"
|
|
);
|
|
|
|
asm( "push %%ebx\n"
|
|
"pushf\n"
|
|
"pop %%eax\n"
|
|
"mov %%eax, %%ebx\n"
|
|
"xor $0x00200000, %%eax\n"
|
|
"push %%eax\n"
|
|
"popf\n"
|
|
"pushf\n"
|
|
"pop %%eax\n"
|
|
"xor %%edx, %%edx\n"
|
|
"xor %%ebx, %%eax\n"
|
|
"jz 2f\n"
|
|
|
|
"mov $0x80000000, %%eax\n"
|
|
"cpuid\n"
|
|
"cmp $0x80000000, %%eax\n"
|
|
"jbe 2f\n"
|
|
"mov $0x80000001, %%eax\n"
|
|
"cpuid\n"
|
|
"2:\n"
|
|
"pop %%ebx\n"
|
|
"mov %%edx, %0\n"
|
|
|
|
: "=r" (extended_result)
|
|
:
|
|
: "%eax", "%ecx", "%edx"
|
|
);
|
|
|
|
if( result & (1u << 15) )
|
|
features |= CMOV;
|
|
if( result & (1u << 23) )
|
|
features |= MMX;
|
|
if( extended_result & (1u << 22) )
|
|
features |= MMXEXT;
|
|
if( extended_result & (1u << 31) )
|
|
features |= MMX3DNOW;
|
|
if( extended_result & (1u << 30) )
|
|
features |= MMX3DNOWEXT;
|
|
if( result & (1u << 25) )
|
|
features |= SSE;
|
|
if( result & (1u << 26) )
|
|
features |= SSE2;
|
|
#endif
|
|
|
|
#ifdef LMMS_HOST_X86
|
|
if( features & MMX )
|
|
{
|
|
memCpy = memCpyMMX;
|
|
memClear = memClearMMX;
|
|
}
|
|
#endif
|
|
if( features & SSE )
|
|
{
|
|
fprintf( stderr, "Using SSE optimized routines\n" );
|
|
memCpy = memCpySSE;
|
|
memClear = memClearSSE;
|
|
bufApplyGain = bufApplyGainSSE;
|
|
bufMix = bufMixSSE;
|
|
bufMixLRCoeff = bufMixLRCoeffSSE;
|
|
unalignedBufMixLRCoeff = unalignedBufMixLRCoeffSSE;
|
|
bufWetDryMix = bufWetDryMixSSE;
|
|
bufWetDryMixSplitted =
|
|
bufWetDryMixSplittedSSE;
|
|
}
|
|
if( features & SSE2 )
|
|
{
|
|
fprintf( stderr, "Using SSE2 optimized routines\n" );
|
|
memCpy = memCpySSE2;
|
|
memClear = memClearSSE2;
|
|
convertToS16 = convertToS16SSE2;
|
|
}
|
|
extensions_checked = true;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
}
|
|
|