#454: one-pass simd decoder

updated SIMD decoder, support for end-of-stream detection
This commit is contained in:
Andrey Prygunkov
2017-10-19 18:27:04 +02:00
parent 35fca1479c
commit c59ab2d9dc
16 changed files with 925 additions and 854 deletions

View File

@@ -31,28 +31,19 @@
namespace YEncode
{
size_t (*decode)(const unsigned char*, unsigned char*, size_t, char* state) = nullptr;
int (*decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = nullptr;
extern void init_decode_scalar();
bool decode_simd = false;
void (*crc_init)(crc_state *const s) = nullptr;
void (*crc_incr)(crc_state *const s, const unsigned char *src, long len) = nullptr;
uint32_t (*crc_finish)(crc_state *const s) = nullptr;
extern void init_crc_slice();
bool crc_simd = false;
void crc_slice_init(crc_state *const s);
void crc_slice(crc_state *const s, const unsigned char *src, long len);
uint32_t crc_slice_finish(crc_state *const s);
#if defined(__i686__) || defined(__amd64__)
size_t (*decode_sse2)(const unsigned char* src, unsigned char* dest, size_t len, char* state) = nullptr;
extern void init_decode_sse2();
size_t (*decode_ssse3)(const unsigned char* src, unsigned char* dest, size_t len, char* state) = nullptr;
extern void init_decode_ssse3();
void (*crc_init_pclmul)(crc_state *const s) = nullptr;
void (*crc_incr_pclmul)(crc_state *const s, const unsigned char *src, long len) = nullptr;
uint32_t (*crc_finish_pclmul)(crc_state *const s) = nullptr;
extern void init_crc_pclmul();
class CpuId
@@ -75,21 +66,14 @@ public:
#endif
#if defined(__arm__) || defined(__aarch64__)
size_t (*decode_neon)(const unsigned char* src, unsigned char* dest, size_t len, char* state) = nullptr;
extern void init_decode_neon();
void (*crc_init_acle)(crc_state *const s) = nullptr;
void (*crc_incr_acle)(crc_state *const s, const unsigned char *src, long len) = nullptr;
uint32_t (*crc_finish_acle)(crc_state *const s) = nullptr;
extern void init_crc_acle();
#endif
void init()
{
decode = &decode_scalar;
crc_init = &crc_slice_init;
crc_incr = &crc_slice;
crc_finish = &crc_slice_finish;
init_decode_scalar();
init_crc_slice();
#if defined(__i686__) || defined(__amd64__)
CpuId cpuid(1);
@@ -102,31 +86,14 @@ void init()
if (cpu_supports_sse2)
{
init_decode_sse2();
if (decode_sse2)
{
decode = decode_sse2;
decode_simd = true;
}
}
if (cpu_supports_ssse3)
{
init_decode_ssse3();
if (decode_ssse3)
{
decode = decode_ssse3;
decode_simd = true;
}
}
if (cpu_supports_sse41 && cpu_supports_pclmul)
{
init_crc_pclmul();
if (crc_init_pclmul && crc_incr_pclmul && crc_finish_pclmul)
{
crc_init = crc_init_pclmul;
crc_incr = crc_incr_pclmul;
crc_finish = crc_finish_pclmul;
crc_simd = true;
}
}
#endif
@@ -151,22 +118,10 @@ void init()
if (cpu_supports_neon)
{
init_decode_neon();
if (decode_neon)
{
decode = decode_neon;
decode_simd = true;
}
}
if (cpu_supports_crc)
{
init_crc_acle();
if (crc_init_acle && crc_incr_acle && crc_finish_acle)
{
crc_init = crc_init_acle;
crc_incr = crc_incr_acle;
crc_finish = crc_finish_acle;
crc_simd = true;
}
}
#endif
}